Skip to content

Commit 7c366b0

Browse files
authored
[DirectX] Implement llvm.is.fpclass lowering for the fcNegZero FPClassTest and the IsNaN, IsInf, IsFinite, IsNormal DXIL ops (#138048)
Fixes #137209 This PR: - Adds a case to `expandIntrinsic()` in `DXILIntrinsicExpansion.cpp` to expand the `Intrinsic::is_fpclass` in the case of `FPClassTest::fcNegZero` - Defines the `IsNaN`, `IsFinite`, `IsNormal` DXIL ops in `DXIL.td` - Adds a case to `lowerIntrinsics()` in `DXILOpLowering.cpp` to handle the lowering of `Intrinsic::is_fpclass` to the DXIL ops `IsNaN`, `IsInf`, `IsFinite`, `IsNormal` when the FPClassTest is `fcNan`, `fcInf`, `fcFinite`, and `fcNormal` respectively - Creates a test `llvm/test/CodeGen/DirectX/is_fpclass.ll` to exercise the intrinsic expansion and DXIL op lowering of `Intrinsic::is_fpclass` ~~A separate PR will be made to remove the now-redundant `dx_isinf` intrinsic to address #87777.~~ A proper implementation for the lowering of the `llvm.is.fpclass` intrinsic to handle all possible combinations of FPClassTest can be implemented in a separate PR. This PR's implementation focuses primarily on addressing the current use-cases for DirectML and HLSL intrinsics.
1 parent 45cd708 commit 7c366b0

File tree

4 files changed

+285
-0
lines changed

4 files changed

+285
-0
lines changed

llvm/lib/Target/DirectX/DXIL.td

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,15 @@ def Saturate : DXILOp<7, unary> {
422422
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
423423
}
424424

425+
def IsNaN : DXILOp<8, isSpecialFloat> {
426+
let Doc = "Determines if the specified value is NaN.";
427+
let arguments = [OverloadTy];
428+
let result = Int1Ty;
429+
let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
430+
let stages = [Stages<DXIL1_0, [all_stages]>];
431+
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
432+
}
433+
425434
def IsInf : DXILOp<9, isSpecialFloat> {
426435
let Doc = "Determines if the specified value is infinite.";
427436
let intrinsics = [IntrinSelect<int_dx_isinf>];
@@ -432,6 +441,24 @@ def IsInf : DXILOp<9, isSpecialFloat> {
432441
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
433442
}
434443

444+
def IsFinite : DXILOp<10, isSpecialFloat> {
445+
let Doc = "Determines if the specified value is finite.";
446+
let arguments = [OverloadTy];
447+
let result = Int1Ty;
448+
let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
449+
let stages = [Stages<DXIL1_0, [all_stages]>];
450+
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
451+
}
452+
453+
def IsNormal : DXILOp<11, isSpecialFloat> {
454+
let Doc = "Determines if the specified value is normal.";
455+
let arguments = [OverloadTy];
456+
let result = Int1Ty;
457+
let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
458+
let stages = [Stages<DXIL1_0, [all_stages]>];
459+
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
460+
}
461+
435462
def Cos : DXILOp<12, unary> {
436463
let Doc = "Returns cosine(theta) for theta in radians.";
437464
let intrinsics = [IntrinSelect<int_cos>];

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ static bool isIntrinsicExpansion(Function &F) {
4646
case Intrinsic::abs:
4747
case Intrinsic::atan2:
4848
case Intrinsic::exp:
49+
case Intrinsic::is_fpclass:
4950
case Intrinsic::log:
5051
case Intrinsic::log10:
5152
case Intrinsic::pow:
@@ -271,6 +272,59 @@ static Value *expandExpIntrinsic(CallInst *Orig) {
271272
return Exp2Call;
272273
}
273274

275+
static Value *expandIsFPClass(CallInst *Orig) {
276+
Value *T = Orig->getArgOperand(1);
277+
auto *TCI = dyn_cast<ConstantInt>(T);
278+
279+
// These FPClassTest cases have DXIL opcodes, so they will be handled in
280+
// DXIL Op Lowering instead.
281+
switch (TCI->getZExtValue()) {
282+
case FPClassTest::fcInf:
283+
case FPClassTest::fcNan:
284+
case FPClassTest::fcNormal:
285+
case FPClassTest::fcFinite:
286+
return nullptr;
287+
}
288+
289+
IRBuilder<> Builder(Orig);
290+
291+
Value *F = Orig->getArgOperand(0);
292+
Type *FTy = F->getType();
293+
unsigned FNumElem = 0; // 0 => F is not a vector
294+
295+
unsigned BitWidth; // Bit width of F or the ElemTy of F
296+
Type *BitCastTy; // An IntNTy of the same bitwidth as F or ElemTy of F
297+
298+
if (auto *FVecTy = dyn_cast<FixedVectorType>(FTy)) {
299+
Type *ElemTy = FVecTy->getElementType();
300+
FNumElem = FVecTy->getNumElements();
301+
BitWidth = ElemTy->getPrimitiveSizeInBits();
302+
BitCastTy = FixedVectorType::get(Builder.getIntNTy(BitWidth), FNumElem);
303+
} else {
304+
BitWidth = FTy->getPrimitiveSizeInBits();
305+
BitCastTy = Builder.getIntNTy(BitWidth);
306+
}
307+
308+
Value *FBitCast = Builder.CreateBitCast(F, BitCastTy);
309+
switch (TCI->getZExtValue()) {
310+
case FPClassTest::fcNegZero: {
311+
Value *NegZero =
312+
ConstantInt::get(Builder.getIntNTy(BitWidth), 1 << (BitWidth - 1));
313+
Value *RetVal;
314+
if (FNumElem) {
315+
Value *NegZeroSplat = Builder.CreateVectorSplat(FNumElem, NegZero);
316+
RetVal =
317+
Builder.CreateICmpEQ(FBitCast, NegZeroSplat, "is.fpclass.negzero");
318+
} else
319+
RetVal = Builder.CreateICmpEQ(FBitCast, NegZero, "is.fpclass.negzero");
320+
return RetVal;
321+
}
322+
default:
323+
report_fatal_error(Twine("Unsupported FPClassTest"),
324+
/* gen_crash_diag=*/false);
325+
}
326+
}
327+
274328
static Value *expandAnyOrAllIntrinsic(CallInst *Orig,
275329
Intrinsic::ID IntrinsicId) {
276330
Value *X = Orig->getOperand(0);
@@ -557,6 +611,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
557611
case Intrinsic::exp:
558612
Result = expandExpIntrinsic(Orig);
559613
break;
614+
case Intrinsic::is_fpclass:
615+
Result = expandIsFPClass(Orig);
616+
break;
560617
case Intrinsic::log:
561618
Result = expandLogIntrinsic(Orig);
562619
break;

llvm/lib/Target/DirectX/DXILOpLowering.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,50 @@ class OpLowerer {
739739
});
740740
}
741741

742+
[[nodiscard]] bool lowerIsFPClass(Function &F) {
743+
IRBuilder<> &IRB = OpBuilder.getIRB();
744+
Type *RetTy = IRB.getInt1Ty();
745+
746+
return replaceFunction(F, [&](CallInst *CI) -> Error {
747+
IRB.SetInsertPoint(CI);
748+
SmallVector<Value *> Args;
749+
Value *Fl = CI->getArgOperand(0);
750+
Args.push_back(Fl);
751+
752+
dxil::OpCode OpCode;
753+
Value *T = CI->getArgOperand(1);
754+
auto *TCI = dyn_cast<ConstantInt>(T);
755+
switch (TCI->getZExtValue()) {
756+
case FPClassTest::fcInf:
757+
OpCode = dxil::OpCode::IsInf;
758+
break;
759+
case FPClassTest::fcNan:
760+
OpCode = dxil::OpCode::IsNaN;
761+
break;
762+
case FPClassTest::fcNormal:
763+
OpCode = dxil::OpCode::IsNormal;
764+
break;
765+
case FPClassTest::fcFinite:
766+
OpCode = dxil::OpCode::IsFinite;
767+
break;
768+
default:
769+
SmallString<128> Msg =
770+
formatv("Unsupported FPClassTest {0} for DXIL Op Lowering",
771+
TCI->getZExtValue());
772+
return make_error<StringError>(Msg, inconvertibleErrorCode());
773+
}
774+
775+
Expected<CallInst *> OpCall =
776+
OpBuilder.tryCreateOp(OpCode, Args, CI->getName(), RetTy);
777+
if (Error E = OpCall.takeError())
778+
return E;
779+
780+
CI->replaceAllUsesWith(*OpCall);
781+
CI->eraseFromParent();
782+
return Error::success();
783+
});
784+
}
785+
742786
bool lowerIntrinsics() {
743787
bool Updated = false;
744788
bool HasErrors = false;
@@ -805,6 +849,9 @@ class OpLowerer {
805849
case Intrinsic::ctpop:
806850
HasErrors |= lowerCtpopToCountBits(F);
807851
break;
852+
case Intrinsic::is_fpclass:
853+
HasErrors |= lowerIsFPClass(F);
854+
break;
808855
}
809856
Updated = true;
810857
}
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
3+
4+
5+
define noundef i1 @isnegzero(float noundef %a) {
6+
; CHECK-LABEL: define noundef i1 @isnegzero(
7+
; CHECK-SAME: float noundef [[A:%.*]]) {
8+
; CHECK-NEXT: [[ENTRY:.*:]]
9+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A]] to i32
10+
; CHECK-NEXT: [[IS_FPCLASS_NEGZERO:%.*]] = icmp eq i32 [[TMP0]], -2147483648
11+
; CHECK-NEXT: ret i1 [[IS_FPCLASS_NEGZERO]]
12+
;
13+
entry:
14+
%0 = call i1 @llvm.is.fpclass.f32(float %a, i32 32)
15+
ret i1 %0
16+
}
17+
18+
define noundef <2 x i1> @isnegzerov2(<2 x float> noundef %a) {
19+
; CHECK-LABEL: define noundef <2 x i1> @isnegzerov2(
20+
; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
21+
; CHECK-NEXT: [[ENTRY:.*:]]
22+
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
23+
; CHECK-NEXT: [[DOTI0:%.*]] = bitcast float [[A_I0]] to i32
24+
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
25+
; CHECK-NEXT: [[DOTI1:%.*]] = bitcast float [[A_I1]] to i32
26+
; CHECK-NEXT: [[IS_FPCLASS_NEGZERO_I0:%.*]] = icmp eq i32 [[DOTI0]], -2147483648
27+
; CHECK-NEXT: [[IS_FPCLASS_NEGZERO_I1:%.*]] = icmp eq i32 [[DOTI1]], -2147483648
28+
; CHECK-NEXT: [[IS_FPCLASS_NEGZERO_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[IS_FPCLASS_NEGZERO_I0]], i64 0
29+
; CHECK-NEXT: [[IS_FPCLASS_NEGZERO:%.*]] = insertelement <2 x i1> [[IS_FPCLASS_NEGZERO_UPTO0]], i1 [[IS_FPCLASS_NEGZERO_I1]], i64 1
30+
; CHECK-NEXT: ret <2 x i1> [[IS_FPCLASS_NEGZERO]]
31+
;
32+
entry:
33+
%0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 32)
34+
ret <2 x i1> %0
35+
}
36+
37+
define noundef i1 @isnan(float noundef %a) {
38+
; CHECK-LABEL: define noundef i1 @isnan(
39+
; CHECK-SAME: float noundef [[A:%.*]]) {
40+
; CHECK-NEXT: [[ENTRY:.*:]]
41+
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 8, float [[A]]) #[[ATTR0:[0-9]+]]
42+
; CHECK-NEXT: ret i1 [[TMP0]]
43+
;
44+
entry:
45+
%0 = call i1 @llvm.is.fpclass.f32(float %a, i32 3)
46+
ret i1 %0
47+
}
48+
49+
define noundef <2 x i1> @isnanv2(<2 x float> noundef %a) {
50+
; CHECK-LABEL: define noundef <2 x i1> @isnanv2(
51+
; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
52+
; CHECK-NEXT: [[ENTRY:.*:]]
53+
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
54+
; CHECK-NEXT: [[DOTI02:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 8, float [[A_I0]]) #[[ATTR0]]
55+
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
56+
; CHECK-NEXT: [[DOTI11:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 8, float [[A_I1]]) #[[ATTR0]]
57+
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[DOTI02]], i64 0
58+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> [[DOTUPTO0]], i1 [[DOTI11]], i64 1
59+
; CHECK-NEXT: ret <2 x i1> [[TMP0]]
60+
;
61+
entry:
62+
%0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 3)
63+
ret <2 x i1> %0
64+
}
65+
66+
define noundef i1 @isinf(float noundef %a) {
67+
; CHECK-LABEL: define noundef i1 @isinf(
68+
; CHECK-SAME: float noundef [[A:%.*]]) {
69+
; CHECK-NEXT: [[ENTRY:.*:]]
70+
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 9, float [[A]]) #[[ATTR0]]
71+
; CHECK-NEXT: ret i1 [[TMP0]]
72+
;
73+
entry:
74+
%0 = call i1 @llvm.is.fpclass.f32(float %a, i32 516)
75+
ret i1 %0
76+
}
77+
78+
define noundef <2 x i1> @isinfv2(<2 x float> noundef %a) {
79+
; CHECK-LABEL: define noundef <2 x i1> @isinfv2(
80+
; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
81+
; CHECK-NEXT: [[ENTRY:.*:]]
82+
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
83+
; CHECK-NEXT: [[DOTI02:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 9, float [[A_I0]]) #[[ATTR0]]
84+
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
85+
; CHECK-NEXT: [[DOTI11:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 9, float [[A_I1]]) #[[ATTR0]]
86+
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[DOTI02]], i64 0
87+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> [[DOTUPTO0]], i1 [[DOTI11]], i64 1
88+
; CHECK-NEXT: ret <2 x i1> [[TMP0]]
89+
;
90+
entry:
91+
%0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 516)
92+
ret <2 x i1> %0
93+
}
94+
95+
define noundef i1 @isfinite(float noundef %a) {
96+
; CHECK-LABEL: define noundef i1 @isfinite(
97+
; CHECK-SAME: float noundef [[A:%.*]]) {
98+
; CHECK-NEXT: [[ENTRY:.*:]]
99+
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 10, float [[A]]) #[[ATTR0]]
100+
; CHECK-NEXT: ret i1 [[TMP0]]
101+
;
102+
entry:
103+
%0 = call i1 @llvm.is.fpclass.f32(float %a, i32 504)
104+
ret i1 %0
105+
}
106+
107+
define noundef <2 x i1> @isfinitev2(<2 x float> noundef %a) {
108+
; CHECK-LABEL: define noundef <2 x i1> @isfinitev2(
109+
; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
110+
; CHECK-NEXT: [[ENTRY:.*:]]
111+
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
112+
; CHECK-NEXT: [[DOTI02:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 10, float [[A_I0]]) #[[ATTR0]]
113+
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
114+
; CHECK-NEXT: [[DOTI11:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 10, float [[A_I1]]) #[[ATTR0]]
115+
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[DOTI02]], i64 0
116+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> [[DOTUPTO0]], i1 [[DOTI11]], i64 1
117+
; CHECK-NEXT: ret <2 x i1> [[TMP0]]
118+
;
119+
entry:
120+
%0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 504)
121+
ret <2 x i1> %0
122+
}
123+
124+
define noundef i1 @isnormal(float noundef %a) {
125+
; CHECK-LABEL: define noundef i1 @isnormal(
126+
; CHECK-SAME: float noundef [[A:%.*]]) {
127+
; CHECK-NEXT: [[ENTRY:.*:]]
128+
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 11, float [[A]]) #[[ATTR0]]
129+
; CHECK-NEXT: ret i1 [[TMP0]]
130+
;
131+
entry:
132+
%0 = call i1 @llvm.is.fpclass.f32(float %a, i32 264)
133+
ret i1 %0
134+
}
135+
136+
define noundef <2 x i1> @isnormalv2(<2 x float> noundef %a) {
137+
; CHECK-LABEL: define noundef <2 x i1> @isnormalv2(
138+
; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
139+
; CHECK-NEXT: [[ENTRY:.*:]]
140+
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
141+
; CHECK-NEXT: [[DOTI02:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 11, float [[A_I0]]) #[[ATTR0]]
142+
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
143+
; CHECK-NEXT: [[DOTI11:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 11, float [[A_I1]]) #[[ATTR0]]
144+
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[DOTI02]], i64 0
145+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> [[DOTUPTO0]], i1 [[DOTI11]], i64 1
146+
; CHECK-NEXT: ret <2 x i1> [[TMP0]]
147+
;
148+
entry:
149+
%0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 264)
150+
ret <2 x i1> %0
151+
}
152+
153+
declare i1 @llvm.is.fpclass.f32(float, i32 immarg)
154+
declare <2 x i1> @llvm.is.fpclass.v2f32(<2 x float>, i32 immarg)

0 commit comments

Comments
 (0)