[DirectX] Implement llvm.is.fpclass lowering for the fcNegZero FPClassTest and the IsNaN, IsInf, IsFinite, IsNormal DXIL ops (#138048)

Icohedron · web-flow · commit 7c366b041cd0 · 2025-05-08T09:13:26.000-07:00
Fixes #137209 This PR: - Adds a case to `expandIntrinsic()` in `DXILIntrinsicExpansion.cpp` to expand the `Intrinsic::is_fpclass` in the case of `FPClassTest::fcNegZero` - Defines the `IsNaN`, `IsFinite`, `IsNormal` DXIL ops in `DXIL.td` - Adds a case to `lowerIntrinsics()` in `DXILOpLowering.cpp` to handle the lowering of `Intrinsic::is_fpclass` to the DXIL ops `IsNaN`, `IsInf`, `IsFinite`, `IsNormal` when the FPClassTest is `fcNan`, `fcInf`, `fcFinite`, and `fcNormal` respectively - Creates a test `llvm/test/CodeGen/DirectX/is_fpclass.ll` to exercise the intrinsic expansion and DXIL op lowering of `Intrinsic::is_fpclass` ~~A separate PR will be made to remove the now-redundant `dx_isinf` intrinsic to address #87777.~~ A proper implementation for the lowering of the `llvm.is.fpclass` intrinsic to handle all possible combinations of FPClassTest can be implemented in a separate PR. This PR's implementation focuses primarily on addressing the current use-cases for DirectML and HLSL intrinsics.
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
@@ -422,6 +422,15 @@ def Saturate : DXILOp<7, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
+def IsNaN : DXILOp<8, isSpecialFloat> {
+  let Doc = "Determines if the specified value is NaN.";
+  let arguments = [OverloadTy];
+  let result = Int1Ty;
+  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
 def IsInf : DXILOp<9, isSpecialFloat> {
   let Doc = "Determines if the specified value is infinite.";
   let intrinsics = [IntrinSelect<int_dx_isinf>];
@@ -432,6 +441,24 @@ def IsInf : DXILOp<9, isSpecialFloat> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
+def IsFinite : DXILOp<10, isSpecialFloat> {
+  let Doc = "Determines if the specified value is finite.";
+  let arguments = [OverloadTy];
+  let result = Int1Ty;
+  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
+def IsNormal : DXILOp<11, isSpecialFloat> {
+  let Doc = "Determines if the specified value is normal.";
+  let arguments = [OverloadTy];
+  let result = Int1Ty;
+  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
 def Cos : DXILOp<12, unary> {
   let Doc = "Returns cosine(theta) for theta in radians.";
   let intrinsics = [IntrinSelect<int_cos>];
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -46,6 +46,7 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::abs:
   case Intrinsic::atan2:
   case Intrinsic::exp:
+  case Intrinsic::is_fpclass:
   case Intrinsic::log:
   case Intrinsic::log10:
   case Intrinsic::pow:
@@ -271,6 +272,59 @@ static Value *expandExpIntrinsic(CallInst *Orig) {
   return Exp2Call;
 }
 
+static Value *expandIsFPClass(CallInst *Orig) {
+  Value *T = Orig->getArgOperand(1);
+  auto *TCI = dyn_cast<ConstantInt>(T);
+
+  // These FPClassTest cases have DXIL opcodes, so they will be handled in
+  // DXIL Op Lowering instead.
+  switch (TCI->getZExtValue()) {
+  case FPClassTest::fcInf:
+  case FPClassTest::fcNan:
+  case FPClassTest::fcNormal:
+  case FPClassTest::fcFinite:
+    return nullptr;
+  }
+
+  IRBuilder<> Builder(Orig);
+
+  Value *F = Orig->getArgOperand(0);
+  Type *FTy = F->getType();
+  unsigned FNumElem = 0; // 0 => F is not a vector
+
+  unsigned BitWidth; // Bit width of F or the ElemTy of F
+  Type *BitCastTy;   // An IntNTy of the same bitwidth as F or ElemTy of F
+
+  if (auto *FVecTy = dyn_cast<FixedVectorType>(FTy)) {
+    Type *ElemTy = FVecTy->getElementType();
+    FNumElem = FVecTy->getNumElements();
+    BitWidth = ElemTy->getPrimitiveSizeInBits();
+    BitCastTy = FixedVectorType::get(Builder.getIntNTy(BitWidth), FNumElem);
+  } else {
+    BitWidth = FTy->getPrimitiveSizeInBits();
+    BitCastTy = Builder.getIntNTy(BitWidth);
+  }
+
+  Value *FBitCast = Builder.CreateBitCast(F, BitCastTy);
+  switch (TCI->getZExtValue()) {
+  case FPClassTest::fcNegZero: {
+    Value *NegZero =
+        ConstantInt::get(Builder.getIntNTy(BitWidth), 1 << (BitWidth - 1));
+    Value *RetVal;
+    if (FNumElem) {
+      Value *NegZeroSplat = Builder.CreateVectorSplat(FNumElem, NegZero);
+      RetVal =
+          Builder.CreateICmpEQ(FBitCast, NegZeroSplat, "is.fpclass.negzero");
+    } else
+      RetVal = Builder.CreateICmpEQ(FBitCast, NegZero, "is.fpclass.negzero");
+    return RetVal;
+  }
+  default:
+    report_fatal_error(Twine("Unsupported FPClassTest"),
+                       /* gen_crash_diag=*/false);
+  }
+}
+
 static Value *expandAnyOrAllIntrinsic(CallInst *Orig,
                                       Intrinsic::ID IntrinsicId) {
   Value *X = Orig->getOperand(0);
@@ -557,6 +611,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::exp:
     Result = expandExpIntrinsic(Orig);
     break;
+  case Intrinsic::is_fpclass:
+    Result = expandIsFPClass(Orig);
+    break;
   case Intrinsic::log:
     Result = expandLogIntrinsic(Orig);
     break;
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -739,6 +739,50 @@ class OpLowerer {
     });
   }
 
+  [[nodiscard]] bool lowerIsFPClass(Function &F) {
+    IRBuilder<> &IRB = OpBuilder.getIRB();
+    Type *RetTy = IRB.getInt1Ty();
+
+    return replaceFunction(F, [&](CallInst *CI) -> Error {
+      IRB.SetInsertPoint(CI);
+      SmallVector<Value *> Args;
+      Value *Fl = CI->getArgOperand(0);
+      Args.push_back(Fl);
+
+      dxil::OpCode OpCode;
+      Value *T = CI->getArgOperand(1);
+      auto *TCI = dyn_cast<ConstantInt>(T);
+      switch (TCI->getZExtValue()) {
+      case FPClassTest::fcInf:
+        OpCode = dxil::OpCode::IsInf;
+        break;
+      case FPClassTest::fcNan:
+        OpCode = dxil::OpCode::IsNaN;
+        break;
+      case FPClassTest::fcNormal:
+        OpCode = dxil::OpCode::IsNormal;
+        break;
+      case FPClassTest::fcFinite:
+        OpCode = dxil::OpCode::IsFinite;
+        break;
+      default:
+        SmallString<128> Msg =
+            formatv("Unsupported FPClassTest {0} for DXIL Op Lowering",
+                    TCI->getZExtValue());
+        return make_error<StringError>(Msg, inconvertibleErrorCode());
+      }
+
+      Expected<CallInst *> OpCall =
+          OpBuilder.tryCreateOp(OpCode, Args, CI->getName(), RetTy);
+      if (Error E = OpCall.takeError())
+        return E;
+
+      CI->replaceAllUsesWith(*OpCall);
+      CI->eraseFromParent();
+      return Error::success();
+    });
+  }
+
   bool lowerIntrinsics() {
     bool Updated = false;
     bool HasErrors = false;
@@ -805,6 +849,9 @@ class OpLowerer {
       case Intrinsic::ctpop:
         HasErrors |= lowerCtpopToCountBits(F);
         break;
+      case Intrinsic::is_fpclass:
+        HasErrors |= lowerIsFPClass(F);
+        break;
       }
       Updated = true;
     }
diff --git a/llvm/test/CodeGen/DirectX/is_fpclass.ll b/llvm/test/CodeGen/DirectX/is_fpclass.ll
@@ -0,0 +1,154 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+
+define noundef i1 @isnegzero(float noundef %a) {
+; CHECK-LABEL: define noundef i1 @isnegzero(
+; CHECK-SAME: float noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A]] to i32
+; CHECK-NEXT:    [[IS_FPCLASS_NEGZERO:%.*]] = icmp eq i32 [[TMP0]], -2147483648
+; CHECK-NEXT:    ret i1 [[IS_FPCLASS_NEGZERO]]
+;
+entry:
+  %0 = call i1 @llvm.is.fpclass.f32(float %a, i32 32)
+  ret i1 %0
+}
+
+define noundef <2 x i1> @isnegzerov2(<2 x float> noundef %a) {
+; CHECK-LABEL: define noundef <2 x i1> @isnegzerov2(
+; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
+; CHECK-NEXT:    [[DOTI0:%.*]] = bitcast float [[A_I0]] to i32
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
+; CHECK-NEXT:    [[DOTI1:%.*]] = bitcast float [[A_I1]] to i32
+; CHECK-NEXT:    [[IS_FPCLASS_NEGZERO_I0:%.*]] = icmp eq i32 [[DOTI0]], -2147483648
+; CHECK-NEXT:    [[IS_FPCLASS_NEGZERO_I1:%.*]] = icmp eq i32 [[DOTI1]], -2147483648
+; CHECK-NEXT:    [[IS_FPCLASS_NEGZERO_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[IS_FPCLASS_NEGZERO_I0]], i64 0
+; CHECK-NEXT:    [[IS_FPCLASS_NEGZERO:%.*]] = insertelement <2 x i1> [[IS_FPCLASS_NEGZERO_UPTO0]], i1 [[IS_FPCLASS_NEGZERO_I1]], i64 1
+; CHECK-NEXT:    ret <2 x i1> [[IS_FPCLASS_NEGZERO]]
+;
+entry:
+  %0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 32)
+  ret <2 x i1> %0
+}
+
+define noundef i1 @isnan(float noundef %a) {
+; CHECK-LABEL: define noundef i1 @isnan(
+; CHECK-SAME: float noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 8, float [[A]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    ret i1 [[TMP0]]
+;
+entry:
+  %0 = call i1 @llvm.is.fpclass.f32(float %a, i32 3)
+  ret i1 %0
+}
+
+define noundef <2 x i1> @isnanv2(<2 x float> noundef %a) {
+; CHECK-LABEL: define noundef <2 x i1> @isnanv2(
+; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
+; CHECK-NEXT:    [[DOTI02:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 8, float [[A_I0]]) #[[ATTR0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
+; CHECK-NEXT:    [[DOTI11:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 8, float [[A_I1]]) #[[ATTR0]]
+; CHECK-NEXT:    [[DOTUPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[DOTI02]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i1> [[DOTUPTO0]], i1 [[DOTI11]], i64 1
+; CHECK-NEXT:    ret <2 x i1> [[TMP0]]
+;
+entry:
+  %0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 3)
+  ret <2 x i1> %0
+}
+
+define noundef i1 @isinf(float noundef %a) {
+; CHECK-LABEL: define noundef i1 @isinf(
+; CHECK-SAME: float noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 9, float [[A]]) #[[ATTR0]]
+; CHECK-NEXT:    ret i1 [[TMP0]]
+;
+entry:
+  %0 = call i1 @llvm.is.fpclass.f32(float %a, i32 516)
+  ret i1 %0
+}
+
+define noundef <2 x i1> @isinfv2(<2 x float> noundef %a) {
+; CHECK-LABEL: define noundef <2 x i1> @isinfv2(
+; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
+; CHECK-NEXT:    [[DOTI02:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 9, float [[A_I0]]) #[[ATTR0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
+; CHECK-NEXT:    [[DOTI11:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 9, float [[A_I1]]) #[[ATTR0]]
+; CHECK-NEXT:    [[DOTUPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[DOTI02]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i1> [[DOTUPTO0]], i1 [[DOTI11]], i64 1
+; CHECK-NEXT:    ret <2 x i1> [[TMP0]]
+;
+entry:
+  %0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 516)
+  ret <2 x i1> %0
+}
+
+define noundef i1 @isfinite(float noundef %a) {
+; CHECK-LABEL: define noundef i1 @isfinite(
+; CHECK-SAME: float noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 10, float [[A]]) #[[ATTR0]]
+; CHECK-NEXT:    ret i1 [[TMP0]]
+;
+entry:
+  %0 = call i1 @llvm.is.fpclass.f32(float %a, i32 504)
+  ret i1 %0
+}
+
+define noundef <2 x i1> @isfinitev2(<2 x float> noundef %a) {
+; CHECK-LABEL: define noundef <2 x i1> @isfinitev2(
+; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
+; CHECK-NEXT:    [[DOTI02:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 10, float [[A_I0]]) #[[ATTR0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
+; CHECK-NEXT:    [[DOTI11:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 10, float [[A_I1]]) #[[ATTR0]]
+; CHECK-NEXT:    [[DOTUPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[DOTI02]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i1> [[DOTUPTO0]], i1 [[DOTI11]], i64 1
+; CHECK-NEXT:    ret <2 x i1> [[TMP0]]
+;
+entry:
+  %0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 504)
+  ret <2 x i1> %0
+}
+
+define noundef i1 @isnormal(float noundef %a) {
+; CHECK-LABEL: define noundef i1 @isnormal(
+; CHECK-SAME: float noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 11, float [[A]]) #[[ATTR0]]
+; CHECK-NEXT:    ret i1 [[TMP0]]
+;
+entry:
+  %0 = call i1 @llvm.is.fpclass.f32(float %a, i32 264)
+  ret i1 %0
+}
+
+define noundef <2 x i1> @isnormalv2(<2 x float> noundef %a) {
+; CHECK-LABEL: define noundef <2 x i1> @isnormalv2(
+; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
+; CHECK-NEXT:    [[DOTI02:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 11, float [[A_I0]]) #[[ATTR0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
+; CHECK-NEXT:    [[DOTI11:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 11, float [[A_I1]]) #[[ATTR0]]
+; CHECK-NEXT:    [[DOTUPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[DOTI02]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i1> [[DOTUPTO0]], i1 [[DOTI11]], i64 1
+; CHECK-NEXT:    ret <2 x i1> [[TMP0]]
+;
+entry:
+  %0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 264)
+  ret <2 x i1> %0
+}
+
+declare i1 @llvm.is.fpclass.f32(float, i32 immarg)
+declare <2 x i1> @llvm.is.fpclass.v2f32(<2 x float>, i32 immarg)