[AMDGPU] Always lower s/udiv64 by constant to MUL #100723

Pierre-vh · 2024-07-26T09:34:59Z

Pierre-vh · 2024-07-26T09:35:24Z

llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll

+
+; Sample test to check how we deal with division/modulos by 64 bit constants.
+
+define noundef i64 @srem64_3(i64 noundef %i)  {


If you only check the second commit, you can see the ISA diff.

llvmbot · 2024-07-26T09:35:30Z

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-amdgpu

Author: Pierre van Houtryve (Pierre-vh)

Changes

Solves #100383

Patch is 129.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100723.diff

9 Files Affected:

(modified) llvm/include/llvm/CodeGen/TargetLowering.h (+8)
(modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+4-2)
(modified) llvm/lib/Target/AMDGPU/SIISelLowering.h (+5)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll (+338-1036)
(added) llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll (+618)
(modified) llvm/test/CodeGen/AMDGPU/fshr.ll (+23-30)
(modified) llvm/test/CodeGen/AMDGPU/udiv.ll (+65-263)
(modified) llvm/test/CodeGen/AMDGPU/udiv64.ll (+42-140)
(modified) llvm/test/CodeGen/AMDGPU/urem64.ll (+31-71)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 9d9886f4920a2..fd45fd9566c95 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5109,6 +5109,14 @@ class TargetLowering : public TargetLoweringBase {
     return 0;
   }
 
+  /// Indicate whether this target prefers to force the lowering of a div by
+  /// constant to a MUL of a magic value, even if the MUL operation is not legal
+  /// or custom.
+  virtual bool forceDivByConstantToMul(EVT DivVT, EVT MuLVT,
+                                       bool IsAfterLegalization) const {
+    return false;
+  }
+
   /// Hooks for building estimates in place of slower divisions and square
   /// roots.
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 140c97ccd90ba..315cf2156bd8c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6405,7 +6405,8 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
     if (VT.isVector())
       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
                                 VT.getVectorElementCount());
-    if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
+    if (isOperationLegalOrCustom(ISD::MUL, WideVT) ||
+        forceDivByConstantToMul(VT, WideVT, IsAfterLegalization)) {
       X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
@@ -6588,7 +6589,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
     if (VT.isVector())
       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
                                 VT.getVectorElementCount());
-    if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
+    if (isOperationLegalOrCustom(ISD::MUL, WideVT) ||
+        forceDivByConstantToMul(VT, WideVT, IsAfterLegalization)) {
       X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1f198a92c0fa6..e4cdd38c69aa1 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -372,6 +372,11 @@ class SITargetLowering final : public AMDGPUTargetLowering {
     return 2;
   }
 
+  bool forceDivByConstantToMul(EVT DivVT, EVT MuLVT,
+                               bool IsAfterLegalization) const override {
+    return !IsAfterLegalization;
+  }
+
   bool supportSplitCSR(MachineFunction *MF) const override;
   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
   void insertCopiesSplitCSR(
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index 96e92bb3dce0d..e4756ad3817c2 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -7066,202 +7066,57 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ;
 ; GFX6-LABEL: udiv_i64_oddk_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_add_u32 s4, 3, 0
-; GFX6-NEXT:    v_mov_b32_e32 v0, 0xe3e0f6
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s4, v0
-; GFX6-NEXT:    s_addc_u32 s5, 0, 0
-; GFX6-NEXT:    s_or_b32 s4, vcc_lo, vcc_hi
-; GFX6-NEXT:    s_cmp_lg_u32 s4, 0
-; GFX6-NEXT:    s_mov_b32 s4, 0x68958c89
-; GFX6-NEXT:    s_movk_i32 s6, 0xfee0
-; GFX6-NEXT:    v_mul_lo_u32 v1, v0, s6
-; GFX6-NEXT:    v_mul_hi_u32 v2, v0, s4
-; GFX6-NEXT:    s_addc_u32 s5, s5, 0
-; GFX6-NEXT:    s_mul_i32 s6, s5, 0x68958c89
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x9
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
-; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s4
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, s6, v1
-; GFX6-NEXT:    v_mul_lo_u32 v3, v0, v1
-; GFX6-NEXT:    v_mul_hi_u32 v4, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v5, v0, v1
-; GFX6-NEXT:    v_mul_hi_u32 v6, s5, v1
-; GFX6-NEXT:    v_mul_lo_u32 v1, s5, v1
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v5, s5, v2
-; GFX6-NEXT:    v_mul_hi_u32 v2, s5, v2
-; GFX6-NEXT:    s_movk_i32 s8, 0x11f
-; GFX6-NEXT:    s_mov_b32 s9, 0x976a7377
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, v4, v2, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v6, vcc
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
-; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v3, vcc
-; GFX6-NEXT:    v_mov_b32_e32 v3, s5
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v3, v2, vcc
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0x64c139ef
+; GFX6-NEXT:    v_mov_b32_e32 v0, 0x38f83e5
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mul_lo_u32 v2, s2, v1
-; GFX6-NEXT:    v_mul_hi_u32 v3, s2, v0
 ; GFX6-NEXT:    v_mul_hi_u32 v4, s2, v1
-; GFX6-NEXT:    v_mul_hi_u32 v5, s3, v1
-; GFX6-NEXT:    v_mul_lo_u32 v1, s3, v1
+; GFX6-NEXT:    v_mul_hi_u32 v3, s3, v1
+; GFX6-NEXT:    s_mov_b32 s5, s1
+; GFX6-NEXT:    v_mul_hi_u32 v2, s2, v0
+; GFX6-NEXT:    s_mul_i32 s1, s3, 0x64c139ef
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, s1, v4
+; GFX6-NEXT:    s_mov_b32 s4, s0
+; GFX6-NEXT:    s_mul_i32 s0, s2, 0x38f83e5
+; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, s0, v4
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v4, s3, v0
 ; GFX6-NEXT:    v_mul_hi_u32 v0, s3, v0
-; GFX6-NEXT:    s_mov_b32 s4, s0
-; GFX6-NEXT:    s_mov_b32 s5, s1
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, v3, v0, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s8
-; GFX6-NEXT:    v_mul_hi_u32 v3, v0, s9
-; GFX6-NEXT:    v_mul_lo_u32 v4, v1, s9
-; GFX6-NEXT:    v_mov_b32_e32 v5, 0x11f
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_mul_lo_u32 v3, v0, s9
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s3, v2
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s2, v3
-; GFX6-NEXT:    v_subb_u32_e64 v4, s[0:1], v4, v5, vcc
-; GFX6-NEXT:    v_subrev_i32_e64 v5, s[0:1], s9, v3
-; GFX6-NEXT:    v_subbrev_u32_e64 v4, s[0:1], 0, v4, s[0:1]
-; GFX6-NEXT:    s_movk_i32 s2, 0x11e
-; GFX6-NEXT:    v_cmp_lt_u32_e64 s[0:1], s2, v4
-; GFX6-NEXT:    s_mov_b32 s9, 0x976a7376
-; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
-; GFX6-NEXT:    v_cmp_lt_u32_e64 s[0:1], s9, v5
-; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
-; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], s8, v4
-; GFX6-NEXT:    v_cndmask_b32_e64 v4, v6, v5, s[0:1]
-; GFX6-NEXT:    v_add_i32_e64 v5, s[0:1], 1, v0
-; GFX6-NEXT:    v_addc_u32_e64 v6, s[0:1], 0, v1, s[0:1]
-; GFX6-NEXT:    v_add_i32_e64 v7, s[0:1], 2, v0
-; GFX6-NEXT:    v_addc_u32_e64 v8, s[0:1], 0, v1, s[0:1]
-; GFX6-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v4
-; GFX6-NEXT:    v_cndmask_b32_e64 v4, v5, v7, s[0:1]
-; GFX6-NEXT:    v_cndmask_b32_e64 v5, v6, v8, s[0:1]
-; GFX6-NEXT:    v_mov_b32_e32 v6, s3
-; GFX6-NEXT:    v_subb_u32_e32 v2, vcc, v6, v2, vcc
-; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s9, v3
-; GFX6-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s8, v2
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GFX6-NEXT:    s_mov_b32 s6, -1
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX6-NEXT:    v_addc_u32_e64 v3, s[0:1], 0, 0, vcc
+; GFX6-NEXT:    s_mul_i32 s0, s3, 0x38f83e5
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, s0, v2
+; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0
+; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 2, v0
 ; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: udiv_i64_oddk_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_add_u32 s0, 3, 0
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0xe3e0f6
-; GFX9-NEXT:    s_addc_u32 s1, 0, 0
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v0
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX9-NEXT:    v_readfirstlane_b32 s2, v0
-; GFX9-NEXT:    s_addc_u32 s0, s1, 0
-; GFX9-NEXT:    s_mul_i32 s3, s2, 0xfffffee0
-; GFX9-NEXT:    s_mul_hi_u32 s8, s2, 0x68958c89
-; GFX9-NEXT:    s_mul_i32 s1, s0, 0x68958c89
-; GFX9-NEXT:    s_add_i32 s3, s8, s3
-; GFX9-NEXT:    s_add_i32 s3, s3, s1
-; GFX9-NEXT:    s_mul_i32 s9, s2, 0x68958c89
-; GFX9-NEXT:    s_mul_hi_u32 s1, s2, s3
-; GFX9-NEXT:    s_mul_i32 s8, s2, s3
-; GFX9-NEXT:    s_mul_hi_u32 s2, s2, s9
-; GFX9-NEXT:    s_add_u32 s2, s2, s8
-; GFX9-NEXT:    s_addc_u32 s1, 0, s1
-; GFX9-NEXT:    s_mul_hi_u32 s10, s0, s9
-; GFX9-NEXT:    s_mul_i32 s9, s0, s9
-; GFX9-NEXT:    s_add_u32 s2, s2, s9
-; GFX9-NEXT:    s_mul_hi_u32 s8, s0, s3
-; GFX9-NEXT:    s_addc_u32 s1, s1, s10
-; GFX9-NEXT:    s_addc_u32 s2, s8, 0
-; GFX9-NEXT:    s_mul_i32 s3, s0, s3
-; GFX9-NEXT:    s_add_u32 s1, s1, s3
-; GFX9-NEXT:    s_addc_u32 s2, 0, s2
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s1, v0
-; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX9-NEXT:    s_addc_u32 s0, s0, s2
-; GFX9-NEXT:    v_readfirstlane_b32 s3, v0
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_mul_i32 s2, s6, s0
-; GFX9-NEXT:    s_mul_hi_u32 s8, s6, s3
-; GFX9-NEXT:    s_mul_hi_u32 s1, s6, s0
-; GFX9-NEXT:    s_add_u32 s2, s8, s2
-; GFX9-NEXT:    s_addc_u32 s1, 0, s1
-; GFX9-NEXT:    s_mul_hi_u32 s9, s7, s3
-; GFX9-NEXT:    s_mul_i32 s3, s7, s3
-; GFX9-NEXT:    s_add_u32 s2, s2, s3
-; GFX9-NEXT:    s_mul_hi_u32 s8, s7, s0
-; GFX9-NEXT:    s_addc_u32 s1, s1, s9
-; GFX9-NEXT:    s_addc_u32 s2, s8, 0
-; GFX9-NEXT:    s_mul_i32 s0, s7, s0
-; GFX9-NEXT:    s_add_u32 s3, s1, s0
-; GFX9-NEXT:    s_addc_u32 s2, 0, s2
-; GFX9-NEXT:    s_mul_i32 s0, s3, 0x11f
-; GFX9-NEXT:    s_mul_hi_u32 s8, s3, 0x976a7377
-; GFX9-NEXT:    s_add_i32 s0, s8, s0
-; GFX9-NEXT:    s_mul_i32 s8, s2, 0x976a7377
-; GFX9-NEXT:    s_mul_i32 s9, s3, 0x976a7377
-; GFX9-NEXT:    s_add_i32 s8, s0, s8
-; GFX9-NEXT:    v_mov_b32_e32 v0, s9
-; GFX9-NEXT:    s_sub_i32 s0, s7, s8
-; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, s6, v0
-; GFX9-NEXT:    s_mov_b32 s1, 0x976a7377
-; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX9-NEXT:    s_subb_u32 s6, s0, 0x11f
-; GFX9-NEXT:    v_subrev_co_u32_e64 v1, s[0:1], s1, v0
-; GFX9-NEXT:    s_cmp_lg_u64 s[0:1], 0
-; GFX9-NEXT:    s_subb_u32 s6, s6, 0
-; GFX9-NEXT:    s_cmpk_gt_u32 s6, 0x11e
-; GFX9-NEXT:    s_mov_b32 s10, 0x976a7376
-; GFX9-NEXT:    s_cselect_b32 s9, -1, 0
-; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s10, v1
-; GFX9-NEXT:    s_cmpk_eq_i32 s6, 0x11f
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[0:1]
-; GFX9-NEXT:    v_mov_b32_e32 v3, s9
-; GFX9-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[0:1]
-; GFX9-NEXT:    s_add_u32 s0, s3, 1
-; GFX9-NEXT:    s_addc_u32 s6, s2, 0
-; GFX9-NEXT:    s_add_u32 s1, s3, 2
-; GFX9-NEXT:    s_addc_u32 s9, s2, 0
-; GFX9-NEXT:    v_mov_b32_e32 v3, s0
-; GFX9-NEXT:    v_mov_b32_e32 v4, s1
-; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v1
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[0:1]
-; GFX9-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-NEXT:    v_mov_b32_e32 v4, s9
-; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[0:1]
-; GFX9-NEXT:    s_subb_u32 s0, s7, s8
-; GFX9-NEXT:    s_cmpk_gt_u32 s0, 0x11e
-; GFX9-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v0
-; GFX9-NEXT:    s_cmpk_eq_i32 s0, 0x11f
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v4, s1
-; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v4, s2
-; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX9-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
+; GFX9-NEXT:    s_mul_hi_u32 s0, s6, 0x38f83e5
+; GFX9-NEXT:    s_mul_i32 s1, s6, 0x38f83e5
+; GFX9-NEXT:    s_mul_i32 s3, s7, 0x64c139ef
+; GFX9-NEXT:    s_mul_hi_u32 s6, s6, 0x64c139ef
+; GFX9-NEXT:    s_mul_hi_u32 s2, s7, 0x64c139ef
+; GFX9-NEXT:    s_add_u32 s3, s3, s6
+; GFX9-NEXT:    s_addc_u32 s2, s2, 0
+; GFX9-NEXT:    s_add_u32 s1, s1, s3
+; GFX9-NEXT:    s_addc_u32 s0, s0, 0
+; GFX9-NEXT:    s_add_u32 s0, s2, s0
+; GFX9-NEXT:    s_addc_u32 s1, 0, 0
+; GFX9-NEXT:    s_mul_i32 s3, s7, 0x38f83e5
+; GFX9-NEXT:    s_mul_hi_u32 s2, s7, 0x38f83e5
+; GFX9-NEXT:    s_add_u32 s0, s3, s0
+; GFX9-NEXT:    s_addc_u32 s0, s2, s1
+; GFX9-NEXT:    s_lshr_b32 s0, s0, 2
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    global_store_dwordx2 v1, v[0:1], s[4:5]
 ; GFX9-NEXT:    s_endpgm
   %r = udiv i64 %x, 1235195949943
   store i64 %r, ptr addrspace(1) %out
@@ -7405,84 +7260,34 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, <
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0xd
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
-; GFX6-NEXT:    s_mov_b32 s2, 0x2ff2fc01
-; GFX6-NEXT:    v_bfrev_b32_e32 v0, 7
-; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshr_b64 s[4:5], s[4:5], 12
-; GFX6-NEXT:    s_add_u32 s2, 0xe037f, s2
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT:    s_addc_u32 s3, 0, 0
-; GFX6-NEXT:    s_or_b32 s2, vcc_lo, vcc_hi
-; GFX6-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX6-NEXT:    s_movk_i32 s2, 0xf001
-; GFX6-NEXT:    v_mul_hi_u32 v1, v0, s2
-; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s2
-; GFX6-NEXT:    s_addc_u32 s8, s3, 0x1000ff
-; GFX6-NEXT:    s_mul_i32 s3, s8, 0xfffff001
-; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v0
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, s3, v1
-; GFX6-NEXT:    v_mul_lo_u32 v3, v0, v1
-; GFX6-NEXT:    v_mul_hi_u32 v4, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v5, v0, v1
-; GFX6-NEXT:    v_mul_hi_u32 v6, s8, v1
-; GFX6-NEXT:    v_mul_lo_u32 v1, s8, v1
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v5, s8, v2
-; GFX6-NEXT:    v_mul_hi_u32 v2, s8, v2
+; GFX6-NEXT:    v_mov_b32_e32 v2, 0x10010011
+; GFX6-NEXT:    v_mov_b32_e32 v0, 0x100100
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
-; GFX6-NEXT:    s_mov_b32 s2, -1
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, v4, v2, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v6, vcc
+; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6-NEXT:    v_mul_hi_u32 v3, s6, v2
+; GFX6-NEXT:    v_mul_hi_u32 v2, s7, v2
+; GFX6-NEXT:    v_mul_hi_u32 v1, s6, v0
+; GFX6-NEXT:    s_mul_i32 s9, s7, 0x10010011
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, s9, v3
+; GFX6-NEXT:    s_mul_i32 s8, s6, 0x100100
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, s8, v3
+; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
-; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v3, vcc
-; GFX6-NEXT:    v_mov_b32_e32 v3, s8
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v3, v2, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, s6, v1
-; GFX6-NEXT:    v_mul_hi_u32 v3, s6, v0
-; GFX6-NEXT:    v_mul_hi_u32 v4, s6, v1
-; GFX6-NEXT:    v_mul_hi_u32 v5, s7, v1
-; GFX6-NEXT:    v_mul_lo_u32 v1, s7, v1
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v4, s7, v0
 ; GFX6-NEXT:    v_mul_hi_u32 v0, s7, v0
-; GFX6-NEXT:    s_movk_i32 s8, 0xfff
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, v3, v0, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v4, v1, s8
-; GFX6-NEXT:    v_mul_hi_u32 v5, v0, s8
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; GFX6-NEXT:    v_mul_lo_u32 v8, v0, s8
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
-; GFX6-NEXT:    v_add_i32_e32 v6, vcc, 2, v0
-; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, 0, v1, vcc
-; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GFX6-NEXT:    v_mov_b32_e32 v5, s7
-; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, s6, v8
-; GFX6-NEXT:    v_subb_u32_e32 v4, vcc, v5, v4, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, s8, v8
-; GFX6-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v4, vcc
-; GFX6-NEXT:    s_movk_i32 s6, 0xffe
-; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v5
-; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
-; GFX6-NEXT:    v_cndmask_b32_e32 v5, -1, v5, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v8
-; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
-; GFX6-NEXT:    v_cndmask_b32_e32 v4, -1, v5, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
-; GFX6-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
+; GFX6-NEXT:    v_addc_u32_e64 v2, s[8:9], 0, 0, vcc
+; GFX6-NEXT:    s_mul_i32 s8, s7, 0x100100
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, s8, v1
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, v0, v2, vcc
+; GFX6-NEXT:    v_mov_b32_e32 v1, s7
+; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s6, v3
+; GFX6-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
+; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], 1
+; GFX6-NEXT:    s_lshr_b64 s[4:5], s[4:5], 12
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
+; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX6-NEXT:    v_lshr_b64 v[2:3], v[0:1], 11
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
@@ -7492,95 +7297,34 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, <
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x34
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
-; GFX9-NEXT:    s_mov_b32 s8, 0x2ff2fc01
-; GFX9-NEXT:    v_bfrev_b32_e32 v0, 7
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_lshr_b64 s[2:3], s[4:5], 12
-; GFX9-NEXT:    s_add_u32 s4, 0xe037f, s8
-; GFX9-NEXT:    s_addc_u32 s5, 0, 0
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v0
-; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX9-NEXT:    v_readfirstlane_b32 s8, v0
-; GFX9-NEXT:    s_addc_u32 s4, s5, 0x1000ff
-; GFX9-NEXT:    s_mul_hi_u32 s9, s8, 0xfffff001
-; GFX9-NEXT:    s_mul_i32 s5, s4, 0xfffff001
-; GFX9-NEXT:    s_sub_i32 s9, s9, s8
-; GFX9-NEXT:    s_add_i32 s9, s9, s5
-; GFX9-NEXT:    s_mul_i32 s11, s8, 0xfffff001
-; GFX9-NEXT:    s_mul_hi_u32 s5, s8, s9
-; GFX9-NEXT:    s_mul_i32 s10, s8, s9
-; GFX9-NEXT:    s_mul_hi_u32 s8, s8, s11
-; GFX9-NEXT:    s_add_u32 s8, s8, s10
-; GFX9-NEXT:    s_addc_u32 s5, 0, s5
-; GFX9-NEXT:    s_mul_hi_u32 s12, s4, s11
-; GFX9-NEXT:    s_mul_i32 s11, s4, s11
-; GFX9-NEXT:    s_add_u32 s8, s8, s11
-; GFX9-NEXT:    s_mul_hi_u32 s10, s4, s9
-; GFX9-NEXT:    s_addc_u32 s5, s5, s12
-; GFX9-NEXT:    s_addc_u32 s8, s10, 0
-; GFX9-NEXT:    s_mul_i32 s9, s4, s9
+; GFX9-NEXT:    s_mul_i32 s9, s7, 0x10010011
+; GFX9-NEXT:    s_mul_hi_u32 s10, s6, 0x10010011
+; GFX9-NEXT:    s_mul_hi_u32 s8, s7, 0x10010011
+; GFX9-NEXT:    s_add_u32 s9, s9, s10
+; GFX9-NEXT:    s_mul_i32 s5, s6, 0x100100
+; GFX9-NEXT:    s_addc_u32 s8, s8, 0
+; GFX9-NEXT:    s_mul_hi_u32 s4, s6, 0x100100
 ; GFX9-NEXT:    s_add_u32 s5, s5, s9
-; GFX9-NEXT:    s_addc_u32 s8, 0, s8
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s5, v0
-; GFX9-NEXT:...
[truncated]

Pierre-vh · 2024-07-26T09:36:20Z

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

@@ -6588,7 +6589,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
    if (VT.isVector())
      WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
                                VT.getVectorElementCount());
-    if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
+    if (isOperationLegalOrCustom(ISD::MUL, WideVT) ||


I tried expanding to MULH/MUL_LOHI but the codegen is a bit worse, there are a few more instructions. Mul seem best.

arsenm · 2024-07-26T10:02:18Z

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

@@ -6405,7 +6405,8 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
    if (VT.isVector())
      WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
                                VT.getVectorElementCount());
-    if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
+    if (isOperationLegalOrCustom(ISD::MUL, WideVT) ||


I think it would make more sense to just delete the check

I tried enabling it for all targets but it doesn't seem to always be profitable. At minimum we need a isTypeLegal check otherwise some targets even crash (riscv32).

I'll have another look

Removing the check causes about 30 failures. Using only isTypeLegal on the wide type doesn't cause more than 1-2 test changes but then it doesn't work on AMDGPU because i128 isn't legal

What I can do I think is refactor this so the isOperationLegalOrCustom check goes away into the TLI hook?

I primarily want to not have any new TLI hook. Quantity of test changes doesn't mean it's worse, just update them?

I assume this should be something like !LegalOperations || isOperationLegalOrCustom(MUL)

Sure, I can update the tests, but we still need some kind of legality check, otherwise we get crashes in a few places. RISCV seems especially sensitive to that and there's a few crashes I can't seem to get rid of without getting rid of the AMDGPU improvements.

Why is the TLI hook not desirable? We already have some for other expansions.
I could make an enum instead to decide which expansion method to use, and targets would default to a legal method while AMDGPU can force MUL expansion.

Why is the TLI hook not desirable?

Because we already have way too many garbage single use, undiscoverable hacks for specific combines like this one.

We already have some for other expansions.

That's the problem. They're all hacky shortcuts to avoid touching all the targets or coming up with a reasonable heuristic in terms of the existing APIs

I see, that makes sense.
I think I'll need to dive a bit into the RISCV backend to see why it's so prone to crashing on this.

The heuristics also needs a bit more thought put into it. I feel like we shouldn't blindly apply this combine, but it should also not rely on legality too much otherwise it won't kick in for AMDGPU

arsenm · 2024-07-29T18:52:59Z

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

+    if ((isOperationExpand(ISD::SDIV, VT) &&
+         isOperationCustom(ISD::SDIVREM, VT.getScalarType())) ||


This logic probably isn't exactly right; it probably shouldn't be looking at the scalar type for no reason. However the DAG legalization rules don't really let you figure out if a vector operation is going to be scalarized or not

I also don't especially like it, it feels very targeted, but I also couldn't find any alternative that didn't cause crashes or regressions in other backends.

Checking the scalar type is indeed because the flow I observed is (vector SDIV) -> (scalar SDIV) -> (scalar SDVIREM)

arsenm · 2024-07-29T18:54:45Z

llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll

+entry:
+  %div = udiv i64 %i, 6
+  ret i64 %div
+}


Does this need power of 2 tests? What about values around int32_max?

Added tests for s/u div/rem for 2, 64, int32 min, int32 max.

MaskRay · 2024-08-02T20:51:25Z

Heads up: This patch also affected ARM, which might cause a SelectionDAG issue for a Halide use case.

LLVM ERROR: Cannot select: 0x7339ffda0770: v4i32 = truncate 0x7339ffda0700
  0x7339ffda0700: v4i64 = srl 0x7339ffd8f620, 0x7339ffd8fc40
    0x7339ffd8f620: v4i64 = mul 0x7339ffda03f0, 0x7339ffd8f540
      0x7339ffda03f0: v4i64 = zero_extend 0x7339ffd74690
        0x7339ffd74690: v4i32 = add 0x7339ffd74a80, 0x7339ffaf42a0
          0x7339ffd74a80: v4i32 = add 0x7339ffd747e0, 0x7339ffd745b0
            0x7339ffd747e0: v4i32 = add 0x7339ffd8fa80, 0x7339ffd74620
              0x7339ffd8fa80: v4i32 = ARMISD::VMULLu 0x7339ffda00e0, 0x7339ffda0000
                0x7339ffda00e0: v4i16 = extract_subvector 0x7339ffaf40e0, Constant:i32<0>
                  0x7339ffaf40e0: v8i16,ch = load<(load (s64) from %stack.2), zext from v8i8> 0x7339ffaf4070, FrameIndex:i32<2>, undef:i32
...

steven-johnson · 2024-08-02T21:15:59Z

Enclosed is a Halide-generated .ll that will assert-fail when run through llc with this PR in place. We should probably revert this PR for now.
bad.ll.zip

This reverts commit 92fbc96. The patch also affected ARM and caused an assertion failure during CurDAG->Legalize (#100723 (comment)).

MaskRay · 2024-08-02T22:11:40Z

Reverted
At llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp:6411

getOperationAction(ISD::SDIV, MVT::v4i32) == Expand
getOperationAction(ISD::SDIVREM, MVT::i32) == Custom

TLI.getTypeAction(*DAG.getContext(), MVT::v4i64) == TargetLoweringBase::TypeSplitVector
// llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp assertion failure

The LegalizeTypeAction of ISD::SRL for MVT::v4i64 is TypeSplitVector, triggering an assertion failure at
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:967

`assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || ...

Cc @davemgreen in case there is test coverage opportunity for llvm/test/CodeGen/ARM

Reland llvm#100723, fixing the ARM issue at the cost of a small regression in AMDGPU. Solves llvm#100383

Reland #100723, fixing the ARM issue at the cost of a small loss of optimization in `test/CodeGen/AMDGPU/fshr.ll` Solves #100383

Pierre-vh added 2 commits July 26, 2024 11:30

Add test for div/rem by constant i64

6015afd

[AMDGPU] Always lower s/udiv64 by constant to MUL

530dc05

Solves llvm#100383

Pierre-vh requested review from jayfoad and arsenm July 26, 2024 09:34

llvmbot added backend:AMDGPU llvm:SelectionDAG SelectionDAGISel as well labels Jul 26, 2024

Pierre-vh commented Jul 26, 2024

View reviewed changes

arsenm reviewed Jul 26, 2024

View reviewed changes

change heuristics

c7618ae

Pierre-vh requested a review from arsenm July 29, 2024 10:17

remove hook

bcd6b9f

arsenm reviewed Jul 29, 2024

View reviewed changes

add s/u div/rem by 2, 64, int32 min & max tests

a25c8d6

Pierre-vh requested a review from arsenm July 30, 2024 09:55

arsenm approved these changes Aug 1, 2024

View reviewed changes

Pierre-vh merged commit 92fbc96 into llvm:main Aug 2, 2024
7 checks passed

Pierre-vh deleted the amdgpu-div-by-k branch August 2, 2024 10:22

MaskRay added a commit that referenced this pull request Aug 2, 2024

Revert "[AMDGPU] Always lower s/udiv64 by constant to MUL (#100723)"

0b92e70

This reverts commit 92fbc96. The patch also affected ARM and caused an assertion failure during CurDAG->Legalize (#100723 (comment)).

MaskRay mentioned this pull request Aug 2, 2024

[AMDGPU] int64 modulo-constant x % 3 and divide-by-constant x / 3 compile to 80 instructions. #100383

Closed

Pierre-vh mentioned this pull request Aug 5, 2024

Reapply "[AMDGPU] Always lower s/udiv64 by constant to MUL" #101942

Merged

Pierre-vh added a commit to Pierre-vh/llvm-project that referenced this pull request Aug 12, 2024

Reapply "[AMDGPU] Always lower s/udiv64 by constant to MUL"

4903632

Reland llvm#100723, fixing the ARM issue at the cost of a small regression in AMDGPU. Solves llvm#100383

Pierre-vh added a commit that referenced this pull request Aug 12, 2024

Reapply "[AMDGPU] Always lower s/udiv64 by constant to MUL" (#101942)

7389545

Reland #100723, fixing the ARM issue at the cost of a small loss of optimization in `test/CodeGen/AMDGPU/fshr.ll` Solves #100383


		; Sample test to check how we deal with division/modulos by 64 bit constants.

		define noundef i64 @srem64_3(i64 noundef %i) {

		if ((isOperationExpand(ISD::SDIV, VT) &&
		isOperationCustom(ISD::SDIVREM, VT.getScalarType())) \|\|

[AMDGPU] Always lower s/udiv64 by constant to MUL #100723

[AMDGPU] Always lower s/udiv64 by constant to MUL #100723

Uh oh!

Conversation

Pierre-vh commented Jul 26, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

llvmbot commented Jul 26, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Pierre-vh Jul 26, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

MaskRay commented Aug 2, 2024

Uh oh!

steven-johnson commented Aug 2, 2024

Uh oh!

MaskRay commented Aug 2, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Jul 26, 2024 •

edited

Loading

Pierre-vh Jul 26, 2024 •

edited

Loading

MaskRay commented Aug 2, 2024 •

edited

Loading