[NVPTX] Further refactor intrinsic definitions to remove redundancy (NFC) #139924

AlexMaclean · 2025-05-14T15:36:57Z

Note: the diff indicates this change has no impact on the intrinsic code generated by table-gen.

llvmbot · 2025-05-14T15:37:29Z

@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

Changes

Note: the diff indicates this change has no impact on the intrinsic code generated by table-gen.

Patch is 165.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139924.diff

1 Files Affected:

(modified) llvm/include/llvm/IR/IntrinsicsNVVM.td (+643-2384)

diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 0b26bb9829005..3e3a55c05a9e0 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -357,38 +357,33 @@ class MMA_SIGNATURE<WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
      !ne(A.ptx_elt_type, B.ptx_elt_type): [A, B],
      true: [A]
      );
-   string ret = !foldl("", id_frags, a, b, !strconcat(a, ".", b.ptx_elt_type));
+   string ret = !foldl("", id_frags, a, b, !strconcat(a, "_", b.ptx_elt_type));
 }
 
 class WMMA_NAME<string ALayout, string BLayout, int Satfinite, string Rnd, string b1op,
                 WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
   string signature = MMA_SIGNATURE<A, B, C, D>.ret;
-  string llvm = "llvm.nvvm.wmma."
-                # A.geom
-                # ".mma"
-                # b1op
-                # "." # ALayout
-                # "." # BLayout
-                # !if(!ne(Rnd, ""), !strconcat(".", Rnd), "")
-                # signature
-                # !if(Satfinite, ".satfinite", "");
-
-  string record = !subst(".", "_",
-                  !subst("llvm.", "int_", llvm));
+  string record = "int_nvvm_wmma_"
+                  # A.geom
+                  # "_mma"
+                  # b1op
+                  # "_" # ALayout
+                  # "_" # BLayout
+                  # !if(!ne(Rnd, ""), !strconcat("_", Rnd), "")
+                  # signature
+                  # !if(Satfinite, "_satfinite", "");
 }
 
 class MMA_NAME<string ALayout, string BLayout, int Satfinite, string b1op,
                WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
   string signature = MMA_SIGNATURE<A, B, C, D>.ret;
-  string llvm = "llvm.nvvm.mma"
-                # b1op
-                # "." # A.geom
-                # "." # ALayout
-                # "." # BLayout
-                # !if(Satfinite, ".satfinite", "")
-                # signature;
-  string record = !subst(".", "_",
-                  !subst("llvm.", "int_", llvm));
+  string record = "int_nvvm_mma"
+                  # b1op
+                  # "_" # A.geom
+                  # "_" # ALayout
+                  # "_" # BLayout
+                  # !if(Satfinite, "_satfinite", "")
+                  # signature;
 }
 
 class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> {
@@ -602,7 +597,7 @@ class NVVM_WMMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_
 
 class NVVM_MMA_B1OPS<list<WMMA_REGS> frags> {
   list<string> ret = !cond(
-    !eq(frags[0].ptx_elt_type, "b1") : [".xor.popc", ".and.popc"],
+    !eq(frags[0].ptx_elt_type, "b1") : ["_xor_popc", "_and_popc"],
     true: [""]
   );
 }
@@ -696,101 +691,6 @@ class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
     [OpType, llvm_i32_ty, llvm_i32_ty]);
 }
 
-class CP_ASYNC_BULK_TENSOR_G2S_INTR<int dim, string mode> {
-  string Name = "int_nvvm_cp_async_bulk_tensor_g2s_" # mode # "_" # dim # "d";
-
-  bit IsIm2Col = !if(!eq(mode, "im2col"), 1, 0);
-  int NumIm2ColOffsets = !if(IsIm2Col, !add(dim, -2), 0);
-  list<LLVMType> Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets);
-  list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
-  list<LLVMType> ArgsTy = !listconcat(
-                          [llvm_shared_cluster_ptr_ty,  // dst_shared_cluster_ptr
-                           llvm_shared_ptr_ty,          // mbarrier_smem_ptr
-                           llvm_ptr_ty],                // tensormap_ptr
-                           TensorDimsTy,                // actual tensor dims
-                           Im2ColOffsetsTy,             // im2col offsets
-                          [llvm_i16_ty,                 // cta_mask
-                           llvm_i64_ty,                 // cache_hint
-                           llvm_i1_ty,                  // Flag for cta_mask
-                           llvm_i1_ty]                  // Flag for cache_hint
-                          );
-
-  int TempFlagsStartIdx = !add(dim, 5);
-  int FlagsStartIdx = !add(TempFlagsStartIdx, NumIm2ColOffsets);
-  list<IntrinsicProperty> IntrProp = [IntrConvergent,
-        WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
-        NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, NoCapture<ArgIndex<2>>,
-        ImmArg<ArgIndex<FlagsStartIdx>>,
-        ImmArg<ArgIndex<!add(FlagsStartIdx, 1)>>];
-}
-
-class CP_ASYNC_BULK_TENSOR_S2G_INTR<int dim, string mode> {
-  string Name = "int_nvvm_cp_async_bulk_tensor_s2g_" # mode # "_" # dim # "d";
-
-  list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
-  list<LLVMType> ArgsTy = !listconcat(
-                          [llvm_shared_ptr_ty,  // src_smem_ptr
-                           llvm_ptr_ty],        // tensormap_ptr
-                           TensorDimsTy,        // actual tensor dims
-                          [llvm_i64_ty,         // cache_hint
-                           llvm_i1_ty]          // Flag for cache_hint
-                          );
-  int FlagsStartIdx = !add(dim, 3);
-  list<IntrinsicProperty> IntrProp = [IntrConvergent,
-        ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
-        NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
-        ImmArg<ArgIndex<FlagsStartIdx>>];
-}
-
-class CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<int dim, string mode> {
-  string Name = "int_nvvm_cp_async_bulk_tensor_prefetch_" # mode # "_" # dim # "d";
-
-  bit IsIm2Col = !if(!eq(mode, "im2col"), 1, 0);
-  int NumIm2ColOffsets = !if(IsIm2Col, !add(dim, -2), 0);
-  list<LLVMType> Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets);
-  list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
-  list<LLVMType> ArgsTy = !listconcat(
-                          [llvm_ptr_ty],     // tensormap_ptr
-                           TensorDimsTy,     // actual tensor dims
-                           Im2ColOffsetsTy,  // im2col offsets
-                          [llvm_i64_ty,      // cache_hint
-                           llvm_i1_ty]       // Flag for cache_hint
-                          );
-
-  int TempFlagsStartIdx = !add(dim, 2);
-  int FlagsStartIdx = !add(TempFlagsStartIdx, NumIm2ColOffsets);
-  list<IntrinsicProperty> IntrProp = [IntrConvergent,
-        ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
-        ImmArg<ArgIndex<FlagsStartIdx>>];
-}
-
-class CP_ASYNC_BULK_TENSOR_REDUCE_INTR<int dim, string mode, string op> {
-  string Suffix = op # "_" # mode # "_" # dim # "d";
-  string Name = "int_nvvm_cp_async_bulk_tensor_reduce_" # Suffix;
-
-  list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
-  list<LLVMType> ArgsTy = !listconcat(
-                          [llvm_shared_ptr_ty,  // src_smem_ptr
-                           llvm_ptr_ty],        // tensormap_ptr
-                           TensorDimsTy,        // actual tensor dims
-                          [llvm_i64_ty,         // cache_hint
-                           llvm_i1_ty]          // Flag for cache_hint
-                          );
-  int FlagsStartIdx = !add(dim, 3);
-  list<IntrinsicProperty> IntrProp = [IntrConvergent,
-        ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
-        NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
-        ImmArg<ArgIndex<FlagsStartIdx>>];
-}
-
-class NVVM_TCGEN05_LDST_NAME<string Op, string Shape, int Num> {
-  string intr =   "llvm.nvvm.tcgen05." # Op
-                  # "." # Shape
-                  # "." # "x" # !shl(1, Num);
-
-  string record = !subst(".", "_",
-                  !subst("llvm.", "int_", intr));
-}
 class NVVM_TCGEN05_LDST_ACCESS_SIZE<string Shape, int Num> {
   int shift = !cond(!eq(Shape, "16x128b"): 1,
                     !eq(Shape, "16x256b"): 2,
@@ -810,6 +710,28 @@ class NVVM_TCGEN05_LDST_ACCESS_SIZE<string Shape, int Num> {
                         true : llvm_void_ty);
 }
 
+class TexVector<string name, list<LLVMType> types> {
+  string Name = name;
+  list<LLVMType> Types = types;
+}
+
+def TV_I8 : TexVector<"i8", [llvm_i16_ty]>;
+def TV_I16 : TexVector<"i16", [llvm_i16_ty]>;
+def TV_I32 : TexVector<"i32", [llvm_i32_ty]>;
+def TV_I64 : TexVector<"i64", [llvm_i64_ty]>;
+def TV_V2I8 : TexVector<"v2i8", !listsplat(llvm_i16_ty, 2)>;
+def TV_V2I16 : TexVector<"v2i16", !listsplat(llvm_i16_ty, 2)>;
+def TV_V2I32 : TexVector<"v2i32", !listsplat(llvm_i32_ty, 2)>;
+def TV_V2I64 : TexVector<"v2i64", !listsplat(llvm_i64_ty, 2)>;
+def TV_V4I8 : TexVector<"v4i8", !listsplat(llvm_i16_ty, 4)>;
+def TV_V4I16 : TexVector<"v4i16", !listsplat(llvm_i16_ty, 4)>;
+def TV_V4I32 : TexVector<"v4i32", !listsplat(llvm_i32_ty, 4)>;
+
+
+def V4F32 : TexVector<"v4f32", !listsplat(llvm_float_ty, 4)>;
+def V4S32 : TexVector<"v4s32", !listsplat(llvm_i32_ty, 4)>;
+def V4U32 : TexVector<"v4u32", !listsplat(llvm_i32_ty, 4)>;
+
 class NVVMBuiltin :
   ClangBuiltin<!strconcat("__", !substr(NAME, !size("int_")))> {
     assert !eq(!substr(NAME, 0, !size("int_nvvm_")), "int_nvvm_"),
@@ -828,131 +750,116 @@ let TargetPrefix = "nvvm" in {
 //
 // Min Max
 //
+  let IntrProperties = [IntrNoMem, IntrSpeculatable, Commutative] in {
+    foreach operation = ["min", "max"] in {
+      def int_nvvm_f # operation # _d : NVVMBuiltin,
+        DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty]>;
 
-  foreach operation = ["min", "max"] in {
-    def int_nvvm_f # operation # _d : NVVMBuiltin,
-      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, IntrSpeculatable, Commutative]>;
+      foreach variant = ["", "_xorsign_abs"] in {
+        foreach nan = ["", "_nan"] in {
+          foreach ftz = ["", "_ftz"] in {
+            def int_nvvm_f # operation # ftz # nan # variant # _f : NVVMBuiltin,
+              DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>;
 
-    foreach variant = ["", "_xorsign_abs"] in {
-      foreach nan = ["", "_nan"] in {
-        foreach ftz = ["", "_ftz"] in {
-          def int_nvvm_f # operation # ftz # nan # variant # _f : NVVMBuiltin,
-            DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-              [IntrNoMem, IntrSpeculatable, Commutative]>;
+            def int_nvvm_f # operation # ftz # nan # variant # _f16 :
+              DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty, llvm_half_ty]>;
 
-          def int_nvvm_f # operation # ftz # nan # variant # _f16 :
-            DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty, llvm_half_ty],
-              [IntrNoMem, IntrSpeculatable, Commutative]>;
+            def int_nvvm_f # operation # ftz # nan # variant # _f16x2 :
+              DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty, llvm_v2f16_ty]>;
 
-          def int_nvvm_f # operation # ftz # nan # variant # _f16x2 :
-            DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty, llvm_v2f16_ty],
-              [IntrNoMem, IntrSpeculatable, Commutative]>;
+            def int_nvvm_f # operation # ftz # nan # variant # _bf16 : NVVMBuiltin,
+              DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty, llvm_bfloat_ty]>;
 
-          def int_nvvm_f # operation # ftz # nan # variant # _bf16 : NVVMBuiltin,
-            DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty, llvm_bfloat_ty],
-              [IntrNoMem, IntrSpeculatable, Commutative]>;
-
-          def int_nvvm_f # operation # ftz # nan # variant # _bf16x2 : NVVMBuiltin,
-            DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty, llvm_v2bf16_ty],
-              [IntrNoMem, IntrSpeculatable, Commutative]>;
-        } // ftz
-      } // nan
-    } // variant
-  } // operation
+            def int_nvvm_f # operation # ftz # nan # variant # _bf16x2 : NVVMBuiltin,
+              DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty, llvm_v2bf16_ty]>;
+          } // ftz
+        } // nan
+      } // variant
+    } // operation
+  }
 
 //
 // Multiplication
 //
+  let IntrProperties = [IntrNoMem, IntrSpeculatable, Commutative] in {
+    foreach sign = ["", "u"] in {
+      def int_nvvm_mulhi_ # sign # s : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty]>;
 
-  foreach sign = ["", "u"] in {
-    def int_nvvm_mulhi_ # sign # s : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
-          [IntrNoMem, IntrSpeculatable, Commutative]>;
-
-    def int_nvvm_mulhi_ # sign # i : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-          [IntrNoMem, IntrSpeculatable, Commutative]>;
+      def int_nvvm_mulhi_ # sign # i : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>;
 
-    def int_nvvm_mulhi_ # sign # ll : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
-          [IntrNoMem, IntrSpeculatable, Commutative]>;
+      def int_nvvm_mulhi_ # sign # ll : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>;
 
-    def int_nvvm_mul24_ # sign # i : NVVMBuiltin,
-      DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, IntrSpeculatable, Commutative]>;
-  }
+      def int_nvvm_mul24_ # sign # i : NVVMBuiltin,
+        DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>;
+    }
 
-  foreach rnd = ["rn", "rz", "rm", "rp"] in {
-    foreach ftz = ["", "_ftz"] in
-      def int_nvvm_mul_ # rnd # ftz # _f : NVVMBuiltin,
-          DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-            [IntrNoMem, IntrSpeculatable, Commutative]>;
+    foreach rnd = ["rn", "rz", "rm", "rp"] in {
+      foreach ftz = ["", "_ftz"] in
+        def int_nvvm_mul_ # rnd # ftz # _f : NVVMBuiltin,
+            DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>;
 
-    def int_nvvm_mul_ # rnd # _d : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-          [IntrNoMem, IntrSpeculatable, Commutative]>;
+      def int_nvvm_mul_ # rnd # _d : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty]>;
+    }
   }
 
 //
 // Div
 //
+  let IntrProperties = [IntrNoMem] in {
+    foreach ftz = ["", "_ftz"] in {
+      def int_nvvm_div_approx # ftz # _f : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>;
 
-  foreach ftz = ["", "_ftz"] in {
-    def int_nvvm_div_approx # ftz # _f : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-          [IntrNoMem]>;
-
-    def int_nvvm_div_full # ftz : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-          [IntrNoMem]>;
-  }
+      def int_nvvm_div_full # ftz : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>;
+    }
 
-  foreach rnd = ["rn", "rz", "rm", "rp"] in {
-    foreach ftz = ["", "_ftz"] in
-      def int_nvvm_div_ # rnd # ftz # _f : NVVMBuiltin,
-          DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-            [IntrNoMem]>;
+    foreach rnd = ["rn", "rz", "rm", "rp"] in {
+      foreach ftz = ["", "_ftz"] in
+        def int_nvvm_div_ # rnd # ftz # _f : NVVMBuiltin,
+            DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>;
 
-    def int_nvvm_div_ # rnd # _d : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-          [IntrNoMem]>;
+      def int_nvvm_div_ # rnd # _d : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty]>;
+    }
   }
 
 //
 // Sad
 //
+  let IntrProperties = [IntrNoMem, Commutative, IntrSpeculatable] in {
+    foreach sign = ["", "u"] in {
+      def int_nvvm_sad_ # sign # s : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty]>;
 
-  foreach sign = ["", "u"] in {
-    def int_nvvm_sad_ # sign # s : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
-          [IntrNoMem, Commutative, IntrSpeculatable]>;
-
-    def int_nvvm_sad_ # sign # i : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-          [IntrNoMem, Commutative, IntrSpeculatable]>;
+      def int_nvvm_sad_ # sign # i : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
 
-    def int_nvvm_sad_ # sign # ll : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty],
-          [IntrNoMem, Commutative, IntrSpeculatable]>;
+      def int_nvvm_sad_ # sign # ll : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty]>;
+    }
   }
 
 //
 // Floor  Ceil
 //
-
-  foreach op = ["floor", "ceil"] in {
-    foreach ftz = ["", "_ftz"] in
-      def int_nvvm_ # op # ftz # _f : NVVMBuiltin,
-          DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
-    def int_nvvm_ # op # _d : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
+  let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
+    foreach op = ["floor", "ceil"] in {
+      foreach ftz = ["", "_ftz"] in
+        def int_nvvm_ # op # ftz # _f : NVVMBuiltin,
+            DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
+      def int_nvvm_ # op # _d : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
+    }
   }
 
 //
 // Abs
 //
-
   foreach ftz = ["", "_ftz"] in
     def int_nvvm_fabs # ftz :
       DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>],
@@ -961,7 +868,6 @@ let TargetPrefix = "nvvm" in {
 //
 // Abs, Neg bf16, bf16x2
 //
-
   def int_nvvm_neg_bf16 : NVVMBuiltin,
     DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty], [IntrNoMem]>;
   def int_nvvm_neg_bf16x2 : NVVMBuiltin,
@@ -970,62 +876,65 @@ let TargetPrefix = "nvvm" in {
 //
 // Round
 //
+  let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
+    foreach ftz = ["", "_ftz"] in
+      def int_nvvm_round # ftz # _f : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
 
-  foreach ftz = ["", "_ftz"] in
-    def int_nvvm_round # ftz # _f : NVVMBuiltin,
-      DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
-
-  def int_nvvm_round_d : NVVMBuiltin,
-      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
+    def int_nvvm_round_d : NVVMBuiltin,
+        DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
+  }
 
 //
 // Trunc
 //
+  let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
+    foreach ftz = ["", "_ftz"] in
+      def int_nvvm_trunc # ftz # _f : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
 
-  foreach ftz = ["", "_ftz"] in
-    def int_nvvm_trunc # ftz # _f : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
-
-  def int_nvvm_trunc_d : NVVMBuiltin,
-      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
+    def int_nvvm_trunc_d : NVVMBuiltin,
+        DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
+  }
 
 //
 // Saturate
 //
+  let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
+    foreach ftz = ["", "_ftz"] in
+      def int_nvvm_saturate # ftz # _f : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
 
-  foreach ftz = ["", "_ftz"] in
-    def int_nvvm_saturate # ftz # _f : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
-
-  def int_nvvm_saturate_d : NVVMBuiltin,
-      DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
+    def int_nvvm_saturate_d : NVVMBuiltin,
+        DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
+  }
 
 //
 // Exp2  Log2
 //
+  let IntrProperties = [IntrNoMem] in {
+    foreach ftz = ["", "_ftz"] in
+      def int_nvvm_ex2_approx # ftz # _f : NVVMBuiltin,
+          DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
 
-  foreach ftz = ["", "_ftz"] in
-    def int_nvvm_ex2_approx # ftz # _f : NVVMBuiltin,
-        DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-
-  def int_n...
[truncated]

Artem-B

Nice to see those tex/surf intrinsics combined into something less verbose.
Setting intrinsic attributes via encompassing let is a mixed blessing.
It's fine for smaller blocks, where one can see them.

They are a bit of a regression in terms of readability for larger blocks, as it makes it too easy to look at an intrinsic, and completely miss the attributes that are set somewhere else, out of view. Considering that those properties are critical for the intrinsics, it's not ideal.

Not sure whether it's something we need to fix -- large-block let already exist in other places, so we can certainly live with it, but it would be nice if we could keep relevant info easily visible where it matters. Perhaps we could apply let over parts of particularly large blocks. It will be redundant, but may work well enough for keeping things readable. WDYT?

AlexMaclean · 2025-05-14T19:59:13Z

Nice to see those tex/surf intrinsics combined into something less verbose. Setting intrinsic attributes via encompassing let is a mixed blessing. It's fine for smaller blocks, where one can see them.

They are a bit of a regression in terms of readability for larger blocks, as it makes it too easy to look at an intrinsic, and completely miss the attributes that are set somewhere else, out of view. Considering that those properties are critical for the intrinsics, it's not ideal.

Not sure whether it's something we need to fix -- large-block let already exist in other places, so we can certainly live with it, but it would be nice if we could keep relevant info easily visible where it matters. Perhaps we could apply let over parts of particularly large blocks. It will be redundant, but may work well enough for keeping things readable. WDYT?

Looking through the file, the only place I see where there seems like a problem is the type-conversion intrinsics. In all the other cases, it looks like the let fits pretty comfortably onto my screen. Since it seems like this is just a problem in this one place, I think it is probably fine to live with the large let. Alternately if you prefer, I could define a PureIntrinsic class which is a DefaultAttrsIntrinsic with IntrSpeculatable and IntrNoMem. I could then use this for all type-conversion intrinsics instead of a let. Would that work?

Artem-B · 2025-05-14T21:06:03Z

I agree, only conversion intrinsics are affected, no need to special case them. Everything else looks fine when we look at the file with the change applied..

the only place I see where there seems like a problem is the type-conversion intrinsics.

That's what prompted my comment. Looking at it as a diff also made things look worse, as the new changes were spread around by removed chunks on the left side of the diff. E.g. fma:

llvm/include/llvm/IR/IntrinsicsNVVM.td

…NFC) (llvm#139924) Note: the diff indicates this change has no impact on the intrinsic code generated by table-gen.

AlexMaclean added 4 commits May 14, 2025 15:17

scratch

f141062

more cleanup

8bb3e2f

more cleanup

f0875ec

scratch

c23800d

AlexMaclean requested review from Artem-B and justinfargnoli May 14, 2025 15:36

AlexMaclean self-assigned this May 14, 2025

llvmbot added backend:NVPTX llvm:ir labels May 14, 2025

AlexMaclean requested a review from durga4github May 14, 2025 15:54

Artem-B approved these changes May 14, 2025

View reviewed changes

fixup

b556a6e

AlexMaclean merged commit 847561e into llvm:main May 14, 2025
7 checks passed

durga4github reviewed May 15, 2025

View reviewed changes

llvm/include/llvm/IR/IntrinsicsNVVM.td Show resolved Hide resolved

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[NVPTX] Further refactor intrinsic definitions to remove redundancy (NFC) #139924

[NVPTX] Further refactor intrinsic definitions to remove redundancy (NFC) #139924

Uh oh!

AlexMaclean commented May 14, 2025

Uh oh!

llvmbot commented May 14, 2025 •

edited

Loading

Uh oh!

Artem-B left a comment

Uh oh!

AlexMaclean commented May 14, 2025

Uh oh!

Artem-B commented May 14, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

[NVPTX] Further refactor intrinsic definitions to remove redundancy (NFC) #139924

[NVPTX] Further refactor intrinsic definitions to remove redundancy (NFC) #139924

Uh oh!

Conversation

AlexMaclean commented May 14, 2025

Uh oh!

llvmbot commented May 14, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Artem-B left a comment

Choose a reason for hiding this comment

Uh oh!

AlexMaclean commented May 14, 2025

Uh oh!

Artem-B commented May 14, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

llvmbot commented May 14, 2025 •

edited

Loading