[AMDGPU] InstCombine llvm.amdgcn.ds.bpermute with uniform arguments #129895

jayfoad · 2025-03-05T16:05:48Z

No description provided.

llvmbot · 2025-03-05T16:06:28Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/129895.diff

2 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+21-3)
(modified) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll (+64-23)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index ebe740f884ea6..8925f2e974efa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1118,9 +1118,11 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
   }
   case Intrinsic::amdgcn_permlane64:
   case Intrinsic::amdgcn_readfirstlane:
-  case Intrinsic::amdgcn_readlane: {
-    // If the first argument is uniform these intrinsics return it unchanged.
-    const Use &Src = II.getArgOperandUse(0);
+  case Intrinsic::amdgcn_readlane:
+  case Intrinsic::amdgcn_ds_bpermute: {
+    // If the data argument is uniform these intrinsics return it unchanged.
+    unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
+    const Use &Src = II.getArgOperandUse(SrcIdx);
     if (isTriviallyUniform(Src))
       return IC.replaceInstUsesWith(II, Src.get());
 
@@ -1152,6 +1154,22 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
       }
     }
 
+    // If the lane argument of bpermute is uniform, change it to readlane. This
+    // generates better code and can enable further optimizations because
+    // readlane is AlwaysUniform.
+    if (IID == Intrinsic::amdgcn_ds_bpermute) {
+      const Use &Lane = II.getArgOperandUse(0);
+      if (isTriviallyUniform(Lane)) {
+        Value *NewLane = IC.Builder.CreateLShr(Lane, 2);
+        Function *NewDecl = Intrinsic::getOrInsertDeclaration(
+            II.getModule(), Intrinsic::amdgcn_readlane, II.getType());
+        II.setCalledFunction(NewDecl);
+        II.setOperand(0, Src);
+        II.setOperand(1, NewLane);
+        return &II;
+      }
+    }
+
     return std::nullopt;
   }
   case Intrinsic::amdgcn_writelane: {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 3605c4a929c58..843b436aa1b0f 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -66,7 +66,7 @@ define double @test_constant_fold_rcp_f64_43() nounwind {
 
 define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp {
 ; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR14:[0-9]+]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR17:[0-9]+]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone
@@ -115,7 +115,7 @@ define half @test_constant_fold_sqrt_f16_0() nounwind {
 
 define float @test_constant_fold_sqrt_f32_0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_sqrt_f32_0(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR15:[0-9]+]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR18:[0-9]+]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.sqrt.f32(float 0.0) nounwind readnone
@@ -124,7 +124,7 @@ define float @test_constant_fold_sqrt_f32_0() nounwind {
 
 define double @test_constant_fold_sqrt_f64_0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_sqrt_f64_0(
-; CHECK-NEXT:    [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR15]]
+; CHECK-NEXT:    [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR18]]
 ; CHECK-NEXT:    ret double [[VAL]]
 ;
   %val = call double @llvm.amdgcn.sqrt.f64(double 0.0) nounwind readnone
@@ -141,7 +141,7 @@ define half @test_constant_fold_sqrt_f16_neg0() nounwind {
 
 define float @test_constant_fold_sqrt_f32_neg0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_sqrt_f32_neg0(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR15]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR18]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.sqrt.f32(float -0.0) nounwind readnone
@@ -150,7 +150,7 @@ define float @test_constant_fold_sqrt_f32_neg0() nounwind {
 
 define double @test_constant_fold_sqrt_f64_neg0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_sqrt_f64_neg0(
-; CHECK-NEXT:    [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR15]]
+; CHECK-NEXT:    [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR18]]
 ; CHECK-NEXT:    ret double [[VAL]]
 ;
   %val = call double @llvm.amdgcn.sqrt.f64(double -0.0) nounwind readnone
@@ -718,7 +718,7 @@ define i1 @test_class_isnan_f32(float %x) nounwind {
 
 define i1 @test_class_isnan_f32_strict(float %x) nounwind strictfp {
 ; CHECK-LABEL: @test_class_isnan_f32_strict(
-; CHECK-NEXT:    [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 3) #[[ATTR16:[0-9]+]]
+; CHECK-NEXT:    [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 3) #[[ATTR19:[0-9]+]]
 ; CHECK-NEXT:    ret i1 [[VAL]]
 ;
   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3) strictfp
@@ -736,7 +736,7 @@ define i1 @test_class_is_p0_n0_f32(float %x) nounwind {
 
 define i1 @test_class_is_p0_n0_f32_strict(float %x) nounwind strictfp {
 ; CHECK-LABEL: @test_class_is_p0_n0_f32_strict(
-; CHECK-NEXT:    [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 96) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 96) #[[ATTR19]]
 ; CHECK-NEXT:    ret i1 [[VAL]]
 ;
   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96) strictfp
@@ -2000,7 +2000,7 @@ define i64 @icmp_constant_inputs_false() {
 
 define i64 @icmp_constant_inputs_true() {
 ; CHECK-LABEL: @icmp_constant_inputs_true(
-; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR17:[0-9]+]]
+; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR20:[0-9]+]]
 ; CHECK-NEXT:    ret i64 [[RESULT]]
 ;
   %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)
@@ -2707,7 +2707,7 @@ define i64 @fcmp_constant_inputs_false() {
 
 define i64 @fcmp_constant_inputs_true() {
 ; CHECK-LABEL: @fcmp_constant_inputs_true(
-; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR17]]
+; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR20]]
 ; CHECK-NEXT:    ret i64 [[RESULT]]
 ;
   %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4)
@@ -5845,7 +5845,7 @@ define double @trig_preop_constfold_neg32_segment() {
 
 define double @trig_preop_constfold_strictfp() strictfp {
 ; CHECK-LABEL: @trig_preop_constfold_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR19]]
 ; CHECK-NEXT:    ret double [[VAL]]
 ;
   %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) strictfp
@@ -6214,7 +6214,7 @@ define half @test_constant_fold_log_f16_neg10() {
 
 define float @test_constant_fold_log_f32_qnan_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_log_f32_qnan_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) #[[ATTR19]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) strictfp
@@ -6223,7 +6223,7 @@ define float @test_constant_fold_log_f32_qnan_strictfp() strictfp {
 
 define float @test_constant_fold_log_f32_0_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_log_f32_0_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0.000000e+00) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0.000000e+00) #[[ATTR19]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.log.f32(float 0.0) strictfp
@@ -6232,7 +6232,7 @@ define float @test_constant_fold_log_f32_0_strictfp() strictfp {
 
 define float @test_constant_fold_log_f32_neg0_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_log_f32_neg0_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -0.000000e+00) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -0.000000e+00) #[[ATTR19]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.log.f32(float -0.0) strictfp
@@ -6241,7 +6241,7 @@ define float @test_constant_fold_log_f32_neg0_strictfp() strictfp {
 
 define float @test_constant_fold_log_f32_neg_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_log_f32_neg_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -1.000000e+01) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -1.000000e+01) #[[ATTR19]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.log.f32(float -10.0) strictfp
@@ -6258,7 +6258,7 @@ define float @test_constant_fold_log_f32_pinf_strictfp() strictfp {
 
 define float @test_constant_fold_log_f32_ninf_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_log_f32_ninf_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) #[[ATTR19]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) strictfp
@@ -6460,7 +6460,7 @@ define half @test_constant_fold_exp2_f16_neg10() {
 
 define float @test_constant_fold_exp2_f32_qnan_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_exp2_f32_qnan_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) #[[ATTR19]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) strictfp
@@ -6469,7 +6469,7 @@ define float @test_constant_fold_exp2_f32_qnan_strictfp() strictfp {
 
 define float @test_constant_fold_exp2_f32_0_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_exp2_f32_0_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0.000000e+00) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0.000000e+00) #[[ATTR19]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.exp2.f32(float 0.0) strictfp
@@ -6478,7 +6478,7 @@ define float @test_constant_fold_exp2_f32_0_strictfp() strictfp {
 
 define float @test_constant_fold_exp2_f32_neg0_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg0_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -0.000000e+00) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -0.000000e+00) #[[ATTR19]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.exp2.f32(float -0.0) strictfp
@@ -6487,7 +6487,7 @@ define float @test_constant_fold_exp2_f32_neg0_strictfp() strictfp {
 
 define float @test_constant_fold_exp2_f32_1_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_exp2_f32_1_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+00) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+00) #[[ATTR19]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.exp2.f32(float 1.0) strictfp
@@ -6496,7 +6496,7 @@ define float @test_constant_fold_exp2_f32_1_strictfp() strictfp {
 
 define float @test_constant_fold_exp2_f32_neg1_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg1_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) #[[ATTR19]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.exp2.f32(float -1.0) strictfp
@@ -6505,7 +6505,7 @@ define float @test_constant_fold_exp2_f32_neg1_strictfp() strictfp {
 
 define float @test_constant_fold_exp2_f32_2_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_exp2_f32_2_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 2.000000e+00) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 2.000000e+00) #[[ATTR19]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.exp2.f32(float 2.0) strictfp
@@ -6514,7 +6514,7 @@ define float @test_constant_fold_exp2_f32_2_strictfp() strictfp {
 
 define float @test_constant_fold_exp2_f32_neg2_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg2_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -2.000000e+00) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -2.000000e+00) #[[ATTR19]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.exp2.f32(float -2.0) strictfp
@@ -6523,7 +6523,7 @@ define float @test_constant_fold_exp2_f32_neg2_strictfp() strictfp {
 
 define float @test_constant_fold_exp2_f32_neg_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+01) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+01) #[[ATTR19]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.exp2.f32(float -10.0) strictfp
@@ -6571,6 +6571,7 @@ declare i32 @llvm.amdgcn.prng.b32(i32)
 define i32 @prng_undef_i32() {
 ; CHECK-LABEL: @prng_undef_i32(
 ; CHECK-NEXT:    ret i32 undef
+;
   %prng = call i32 @llvm.amdgcn.prng.b32(i32 undef)
   ret i32 %prng
 }
@@ -6578,6 +6579,46 @@ define i32 @prng_undef_i32() {
 define i32 @prng_poison_i32() {
 ; CHECK-LABEL: @prng_poison_i32(
 ; CHECK-NEXT:    ret i32 poison
+;
   %prng = call i32 @llvm.amdgcn.prng.b32(i32 poison)
   ret i32 %prng
 }
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.ds.bpermute
+; --------------------------------------------------------------------
+
+define amdgpu_kernel void @ds_bpermute_uniform_src(ptr addrspace(1) %out, i32 %lane) {
+; CHECK-LABEL: @ds_bpermute_uniform_src(
+; CHECK-NEXT:    store i32 7, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.amdgcn.ds.bpermute(i32 %lane, i32 7)
+  store i32 %v, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @ds_bpermute_constant_lane(ptr addrspace(1) %out, i32 %src) {
+; CHECK-LABEL: @ds_bpermute_constant_lane(
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[SRC:%.*]], i32 7)
+; CHECK-NEXT:    store i32 [[V]], ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.amdgcn.ds.bpermute(i32 28, i32 %src)
+  store i32 %v, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @ds_bpermute_uniform_lane(ptr addrspace(1) %out, i32 %lanearg, i32 %src) {
+; CHECK-LABEL: @ds_bpermute_uniform_lane(
+; CHECK-NEXT:    [[LANE:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[LANEARG:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[LANE]], 2
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[SRC:%.*]], i32 [[TMP1]])
+; CHECK-NEXT:    store i32 [[V]], ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  %lane = call i32 @llvm.amdgcn.readfirstlane(i32 %lanearg)
+  %v = call i32 @llvm.amdgcn.ds.bpermute(i32 %lane, i32 %src)
+  store i32 %v, ptr addrspace(1) %out
+  ret void
+}

arsenm · 2025-03-06T02:16:47Z

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

@@ -6496,7 +6496,7 @@ define float @test_constant_fold_exp2_f32_1_strictfp() strictfp {

 define float @test_constant_fold_exp2_f32_neg1_strictfp() strictfp {
 ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg1_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) #[[ATTR16]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) #[[ATTR19]]


All the test attribute renumbering is annoying. Does it go away with a regenerate of the base checks?

All the test attribute renumbering is annoying.

You're telling me! We should switch to printing attributes inline.

Does it go away with a regenerate of the base checks?

Yes. Done.

llvm-ci · 2025-03-06T14:36:27Z

LLVM Buildbot has detected a new failure on builder amdgpu-offload-ubuntu-22-cmake-build-only running on rocm-docker-ubu-22 while building llvm at step 4 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/203/builds/3742

Here is the relevant piece of the build log for the reference

Step 4 (annotate) failure: '../llvm-zorg/zorg/buildbot/builders/annotated/amdgpu-offload-cmake.py --jobs=32' (failure)
...
[46/60] Building LLVM bitcode Parallelism.cpp-nvptx.bc
[47/60] Building LLVM bitcode Parallelism.cpp-amdgpu.bc
[48/60] Building LLVM bitcode Workshare.cpp-amdgpu.bc
[49/60] Building LLVM bitcode Workshare.cpp-nvptx.bc
[50/60] Linking LLVM bitcode libomptarget-amdgpu.bc
[51/60] Linking LLVM bitcode libomptarget-nvptx.bc
[52/60] Internalizing LLVM bitcode libomptarget-amdgpu.bc
[53/60] Internalizing LLVM bitcode libomptarget-nvptx.bc
/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/opt: WARNING: failed to create target machine for 'nvptx64-nvidia-cuda': unable to get target for 'nvptx64-nvidia-cuda', see --version and --triple.
[54/60] Optimizing LLVM bitcode libomptarget-amdgpu.bc
FAILED: offload/DeviceRTL/libomptarget-amdgpu.bc /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc 
cd /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL && /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
opt: /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/llvm-project/llvm/lib/IR/Intrinsics.cpp:157: std::string getIntrinsicNameImpl(llvm::Intrinsic::ID, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool): Assertion `(Tys.empty() || Intrinsic::isOverloaded(Id)) && "This version of getName is for overloaded intrinsics only"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
1.	Running pass "require<globals-aa>,function(invalidate<aa>),require<profile-summary>,cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide)),function(invalidate<should-not-run-function-passes>),cgscc(devirt<4>())" on module "/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
2.	Running pass "cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide))" on module "/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
3.	Running pass "instcombine<max-iterations=1;no-verify-fixpoint>" on function "ompx_shfl_down_sync_f"
 #0 0x000076d2859e2910 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/libLLVMSupport.so.21.0git+0x1e2910)
 #1 0x000076d2859dfd0f llvm::sys::RunSignalHandlers() (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/libLLVMSupport.so.21.0git+0x1dfd0f)
 #2 0x000076d2859dfe5a SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x000076d285619520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #4 0x000076d28566d9fc pthread_kill (/lib/x86_64-linux-gnu/libc.so.6+0x969fc)
 #5 0x000076d285619476 gsignal (/lib/x86_64-linux-gnu/libc.so.6+0x42476)
 #6 0x000076d2855ff7f3 abort (/lib/x86_64-linux-gnu/libc.so.6+0x287f3)
 #7 0x000076d2855ff71b (/lib/x86_64-linux-gnu/libc.so.6+0x2871b)
 #8 0x000076d285610e96 (/lib/x86_64-linux-gnu/libc.so.6+0x39e96)
 #9 0x000076d27f83e402 getIntrinsicNameImpl(unsigned int, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool) Intrinsics.cpp:0:0
#10 0x000076d27f83e6a1 llvm::Intrinsic::getOrInsertDeclaration(llvm::Module*, unsigned int, llvm::ArrayRef<llvm::Type*>) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMCore.so.21.0git+0x23e6a1)
#11 0x000076d2843a7aaa llvm::GCNTTIImpl::instCombineIntrinsic(llvm::InstCombiner&, llvm::IntrinsicInst&) const (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x1a7aaa)
#12 0x000076d2826d8c0b llvm::InstCombinerImpl::visitCallInst(llvm::CallInst&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMInstCombine.so.21.0git+0xb4c0b)
#13 0x000076d28267bc57 llvm::InstCombinerImpl::run() (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMInstCombine.so.21.0git+0x57c57)
#14 0x000076d28267d691 combineInstructionsOverFunction(llvm::Function&, llvm::InstructionWorklist&, llvm::AAResults*, llvm::AssumptionCache&, llvm::TargetLibraryInfo&, llvm::TargetTransformInfo&, llvm::DominatorTree&, llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*, llvm::BranchProbabilityInfo*, llvm::ProfileSummaryInfo*, llvm::InstCombineOptions const&) InstructionCombining.cpp:0:0
#15 0x000076d28267ead8 llvm::InstCombinePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMInstCombine.so.21.0git+0x5aad8)
#16 0x000076d282487aa6 llvm::detail::PassModel<llvm::Function, llvm::InstCombinePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMPasses.so.21.0git+0x87aa6)
#17 0x000076d27f8f998f llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMCore.so.21.0git+0x2f998f)
#18 0x000076d2845bbd26 llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x3bbd26)
#19 0x000076d27fd62944 llvm::CGSCCToFunctionPassAdaptor::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAnalysis.so.21.0git+0x162944)
#20 0x000076d2845bc876 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::CGSCCToFunctionPassAdaptor, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x3bc876)
#21 0x000076d27fd5b317 llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAnalysis.so.21.0git+0x15b317)
#22 0x000076d282486546 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMPasses.so.21.0git+0x86546)
#23 0x000076d27fd63bfd llvm::DevirtSCCRepeatedPass::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAnalysis.so.21.0git+0x163bfd)
#24 0x000076d282486596 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::DevirtSCCRepeatedPass, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMPasses.so.21.0git+0x86596)
#25 0x000076d27fd5e3ce llvm::ModuleToPostOrderCGSCCPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAnalysis.so.21.0git+0x15e3ce)
#26 0x000076d2824864f6 llvm::detail::PassModel<llvm::Module, llvm::ModuleToPostOrderCGSCCPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMPasses.so.21.0git+0x864f6)
#27 0x000076d27f8f88ed llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMCore.so.21.0git+0x2f88ed)
#28 0x000076d28201505a llvm::ModuleInlinerWrapperPass::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMipo.so.21.0git+0x21505a)
#29 0x000076d282485906 llvm::detail::PassModel<llvm::Module, llvm::ModuleInlinerWrapperPass, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMPasses.so.21.0git+0x85906)
Step 7 (build cmake config) failure: build cmake config (failure)
...
[46/60] Building LLVM bitcode Parallelism.cpp-nvptx.bc
[47/60] Building LLVM bitcode Parallelism.cpp-amdgpu.bc
[48/60] Building LLVM bitcode Workshare.cpp-amdgpu.bc
[49/60] Building LLVM bitcode Workshare.cpp-nvptx.bc
[50/60] Linking LLVM bitcode libomptarget-amdgpu.bc
[51/60] Linking LLVM bitcode libomptarget-nvptx.bc
[52/60] Internalizing LLVM bitcode libomptarget-amdgpu.bc
[53/60] Internalizing LLVM bitcode libomptarget-nvptx.bc
/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/opt: WARNING: failed to create target machine for 'nvptx64-nvidia-cuda': unable to get target for 'nvptx64-nvidia-cuda', see --version and --triple.
[54/60] Optimizing LLVM bitcode libomptarget-amdgpu.bc
FAILED: offload/DeviceRTL/libomptarget-amdgpu.bc /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc 
cd /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL && /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
opt: /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/llvm-project/llvm/lib/IR/Intrinsics.cpp:157: std::string getIntrinsicNameImpl(llvm::Intrinsic::ID, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool): Assertion `(Tys.empty() || Intrinsic::isOverloaded(Id)) && "This version of getName is for overloaded intrinsics only"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
1.	Running pass "require<globals-aa>,function(invalidate<aa>),require<profile-summary>,cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide)),function(invalidate<should-not-run-function-passes>),cgscc(devirt<4>())" on module "/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
2.	Running pass "cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide))" on module "/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
3.	Running pass "instcombine<max-iterations=1;no-verify-fixpoint>" on function "ompx_shfl_down_sync_f"
 #0 0x000076d2859e2910 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/libLLVMSupport.so.21.0git+0x1e2910)
 #1 0x000076d2859dfd0f llvm::sys::RunSignalHandlers() (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/libLLVMSupport.so.21.0git+0x1dfd0f)
 #2 0x000076d2859dfe5a SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x000076d285619520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #4 0x000076d28566d9fc pthread_kill (/lib/x86_64-linux-gnu/libc.so.6+0x969fc)
 #5 0x000076d285619476 gsignal (/lib/x86_64-linux-gnu/libc.so.6+0x42476)
 #6 0x000076d2855ff7f3 abort (/lib/x86_64-linux-gnu/libc.so.6+0x287f3)
 #7 0x000076d2855ff71b (/lib/x86_64-linux-gnu/libc.so.6+0x2871b)
 #8 0x000076d285610e96 (/lib/x86_64-linux-gnu/libc.so.6+0x39e96)
 #9 0x000076d27f83e402 getIntrinsicNameImpl(unsigned int, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool) Intrinsics.cpp:0:0
#10 0x000076d27f83e6a1 llvm::Intrinsic::getOrInsertDeclaration(llvm::Module*, unsigned int, llvm::ArrayRef<llvm::Type*>) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMCore.so.21.0git+0x23e6a1)
#11 0x000076d2843a7aaa llvm::GCNTTIImpl::instCombineIntrinsic(llvm::InstCombiner&, llvm::IntrinsicInst&) const (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x1a7aaa)
#12 0x000076d2826d8c0b llvm::InstCombinerImpl::visitCallInst(llvm::CallInst&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMInstCombine.so.21.0git+0xb4c0b)
#13 0x000076d28267bc57 llvm::InstCombinerImpl::run() (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMInstCombine.so.21.0git+0x57c57)
#14 0x000076d28267d691 combineInstructionsOverFunction(llvm::Function&, llvm::InstructionWorklist&, llvm::AAResults*, llvm::AssumptionCache&, llvm::TargetLibraryInfo&, llvm::TargetTransformInfo&, llvm::DominatorTree&, llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*, llvm::BranchProbabilityInfo*, llvm::ProfileSummaryInfo*, llvm::InstCombineOptions const&) InstructionCombining.cpp:0:0
#15 0x000076d28267ead8 llvm::InstCombinePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMInstCombine.so.21.0git+0x5aad8)
#16 0x000076d282487aa6 llvm::detail::PassModel<llvm::Function, llvm::InstCombinePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMPasses.so.21.0git+0x87aa6)
#17 0x000076d27f8f998f llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMCore.so.21.0git+0x2f998f)
#18 0x000076d2845bbd26 llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x3bbd26)
#19 0x000076d27fd62944 llvm::CGSCCToFunctionPassAdaptor::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAnalysis.so.21.0git+0x162944)
#20 0x000076d2845bc876 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::CGSCCToFunctionPassAdaptor, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x3bc876)
#21 0x000076d27fd5b317 llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAnalysis.so.21.0git+0x15b317)
#22 0x000076d282486546 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMPasses.so.21.0git+0x86546)
#23 0x000076d27fd63bfd llvm::DevirtSCCRepeatedPass::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAnalysis.so.21.0git+0x163bfd)
#24 0x000076d282486596 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::DevirtSCCRepeatedPass, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMPasses.so.21.0git+0x86596)
#25 0x000076d27fd5e3ce llvm::ModuleToPostOrderCGSCCPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMAnalysis.so.21.0git+0x15e3ce)
#26 0x000076d2824864f6 llvm::detail::PassModel<llvm::Module, llvm::ModuleToPostOrderCGSCCPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMPasses.so.21.0git+0x864f6)
#27 0x000076d27f8f88ed llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMCore.so.21.0git+0x2f88ed)
#28 0x000076d28201505a llvm::ModuleInlinerWrapperPass::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMipo.so.21.0git+0x21505a)
#29 0x000076d282485906 llvm::detail::PassModel<llvm::Module, llvm::ModuleInlinerWrapperPass, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-ubuntu-22-cmake-build-only/build/bin/../lib/../lib/libLLVMPasses.so.21.0git+0x85906)

llvm-ci · 2025-03-06T14:38:45Z

LLVM Buildbot has detected a new failure on builder amdgpu-offload-rhel-9-cmake-build-only running on rocm-docker-rhel-9 while building llvm at step 4 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/205/builds/2533

Here is the relevant piece of the build log for the reference

Step 4 (annotate) failure: '../llvm-zorg/zorg/buildbot/builders/annotated/amdgpu-offload-cmake.py --jobs=32' (failure)
...
[46/60] Building LLVM bitcode Parallelism.cpp-nvptx.bc
[47/60] Building LLVM bitcode Parallelism.cpp-amdgpu.bc
[48/60] Building LLVM bitcode Workshare.cpp-amdgpu.bc
[49/60] Building LLVM bitcode Workshare.cpp-nvptx.bc
[50/60] Linking LLVM bitcode libomptarget-amdgpu.bc
[51/60] Linking LLVM bitcode libomptarget-nvptx.bc
[52/60] Internalizing LLVM bitcode libomptarget-amdgpu.bc
[53/60] Internalizing LLVM bitcode libomptarget-nvptx.bc
/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/opt: WARNING: failed to create target machine for 'nvptx64-nvidia-cuda': unable to get target for 'nvptx64-nvidia-cuda', see --version and --triple.
[54/60] Optimizing LLVM bitcode libomptarget-amdgpu.bc
FAILED: offload/DeviceRTL/libomptarget-amdgpu.bc /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc 
cd /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL && /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
opt: /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/llvm-project/llvm/lib/IR/Intrinsics.cpp:157: std::string getIntrinsicNameImpl(llvm::Intrinsic::ID, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool): Assertion `(Tys.empty() || Intrinsic::isOverloaded(Id)) && "This version of getName is for overloaded intrinsics only"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
1.	Running pass "require<globals-aa>,function(invalidate<aa>),require<profile-summary>,cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide)),function(invalidate<should-not-run-function-passes>),cgscc(devirt<4>())" on module "/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
2.	Running pass "cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide))" on module "/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
3.	Running pass "instcombine<max-iterations=1;no-verify-fixpoint>" on function "ompx_shfl_down_sync_f"
 #0 0x000071de1a3d00b1 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMSupport.so.21.0git+0x1d00b1)
 #1 0x000071de1a3cd6cb llvm::sys::RunSignalHandlers() (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMSupport.so.21.0git+0x1cd6cb)
 #2 0x000071de1a3cd7f2 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x000071de19e0d730 __restore_rt (/lib64/libc.so.6+0x3e730)
 #4 0x000071de19e5aa6c __pthread_kill_implementation (/lib64/libc.so.6+0x8ba6c)
 #5 0x000071de19e0d686 gsignal (/lib64/libc.so.6+0x3e686)
 #6 0x000071de19df7833 abort (/lib64/libc.so.6+0x28833)
 #7 0x000071de19df775b _nl_load_domain.cold (/lib64/libc.so.6+0x2875b)
 #8 0x000071de19e063c6 (/lib64/libc.so.6+0x373c6)
 #9 0x000071de1a82ddd0 getIntrinsicNameImpl(unsigned int, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool) Intrinsics.cpp:0:0
#10 0x000071de1a82df79 llvm::Intrinsic::getOrInsertDeclaration(llvm::Module*, unsigned int, llvm::ArrayRef<llvm::Type*>) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x22df79)
#11 0x000071de1f797b8d llvm::GCNTTIImpl::instCombineIntrinsic(llvm::InstCombiner&, llvm::IntrinsicInst&) const (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x197b8d)
#12 0x000071de1dadb0ca llvm::InstCombinerImpl::visitCallInst(llvm::CallInst&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMInstCombine.so.21.0git+0xad0ca)
#13 0x000071de1da8026c llvm::InstCombinerImpl::run() (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMInstCombine.so.21.0git+0x5226c)
#14 0x000071de1da81c8d combineInstructionsOverFunction(llvm::Function&, llvm::InstructionWorklist&, llvm::AAResults*, llvm::AssumptionCache&, llvm::TargetLibraryInfo&, llvm::TargetTransformInfo&, llvm::DominatorTree&, llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*, llvm::BranchProbabilityInfo*, llvm::ProfileSummaryInfo*, llvm::InstCombineOptions const&) InstructionCombining.cpp:0:0
#15 0x000071de1da82fce llvm::InstCombinePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMInstCombine.so.21.0git+0x54fce)
#16 0x000071de1d87ee6e llvm::detail::PassModel<llvm::Function, llvm::InstCombinePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x7ee6e)
#17 0x000071de1a8e375f llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x2e375f)
#18 0x000071de1f99e61e llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x39e61e)
#19 0x000071de1ad57d87 llvm::CGSCCToFunctionPassAdaptor::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x157d87)
#20 0x000071de1f99eb4e llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::CGSCCToFunctionPassAdaptor, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x39eb4e)
#21 0x000071de1ad50923 llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x150923)
#22 0x000071de1d87e53e llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x7e53e)
#23 0x000071de1ad58ff3 llvm::DevirtSCCRepeatedPass::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x158ff3)
#24 0x000071de1d87e55e llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::DevirtSCCRepeatedPass, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x7e55e)
#25 0x000071de1ad537f4 llvm::ModuleToPostOrderCGSCCPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x1537f4)
#26 0x000071de1d87e51e llvm::detail::PassModel<llvm::Module, llvm::ModuleToPostOrderCGSCCPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x7e51e)
#27 0x000071de1a8e26cc llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x2e26cc)
#28 0x000071de1c801ad5 llvm::ModuleInlinerWrapperPass::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMipo.so.21.0git+0x201ad5)
#29 0x000071de1d87df8e llvm::detail::PassModel<llvm::Module, llvm::ModuleInlinerWrapperPass, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x7df8e)
Step 7 (build cmake config) failure: build cmake config (failure)
...
[46/60] Building LLVM bitcode Parallelism.cpp-nvptx.bc
[47/60] Building LLVM bitcode Parallelism.cpp-amdgpu.bc
[48/60] Building LLVM bitcode Workshare.cpp-amdgpu.bc
[49/60] Building LLVM bitcode Workshare.cpp-nvptx.bc
[50/60] Linking LLVM bitcode libomptarget-amdgpu.bc
[51/60] Linking LLVM bitcode libomptarget-nvptx.bc
[52/60] Internalizing LLVM bitcode libomptarget-amdgpu.bc
[53/60] Internalizing LLVM bitcode libomptarget-nvptx.bc
/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/opt: WARNING: failed to create target machine for 'nvptx64-nvidia-cuda': unable to get target for 'nvptx64-nvidia-cuda', see --version and --triple.
[54/60] Optimizing LLVM bitcode libomptarget-amdgpu.bc
FAILED: offload/DeviceRTL/libomptarget-amdgpu.bc /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc 
cd /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL && /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
opt: /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/llvm-project/llvm/lib/IR/Intrinsics.cpp:157: std::string getIntrinsicNameImpl(llvm::Intrinsic::ID, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool): Assertion `(Tys.empty() || Intrinsic::isOverloaded(Id)) && "This version of getName is for overloaded intrinsics only"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
1.	Running pass "require<globals-aa>,function(invalidate<aa>),require<profile-summary>,cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide)),function(invalidate<should-not-run-function-passes>),cgscc(devirt<4>())" on module "/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
2.	Running pass "cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide))" on module "/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
3.	Running pass "instcombine<max-iterations=1;no-verify-fixpoint>" on function "ompx_shfl_down_sync_f"
 #0 0x000071de1a3d00b1 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMSupport.so.21.0git+0x1d00b1)
 #1 0x000071de1a3cd6cb llvm::sys::RunSignalHandlers() (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMSupport.so.21.0git+0x1cd6cb)
 #2 0x000071de1a3cd7f2 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x000071de19e0d730 __restore_rt (/lib64/libc.so.6+0x3e730)
 #4 0x000071de19e5aa6c __pthread_kill_implementation (/lib64/libc.so.6+0x8ba6c)
 #5 0x000071de19e0d686 gsignal (/lib64/libc.so.6+0x3e686)
 #6 0x000071de19df7833 abort (/lib64/libc.so.6+0x28833)
 #7 0x000071de19df775b _nl_load_domain.cold (/lib64/libc.so.6+0x2875b)
 #8 0x000071de19e063c6 (/lib64/libc.so.6+0x373c6)
 #9 0x000071de1a82ddd0 getIntrinsicNameImpl(unsigned int, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool) Intrinsics.cpp:0:0
#10 0x000071de1a82df79 llvm::Intrinsic::getOrInsertDeclaration(llvm::Module*, unsigned int, llvm::ArrayRef<llvm::Type*>) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x22df79)
#11 0x000071de1f797b8d llvm::GCNTTIImpl::instCombineIntrinsic(llvm::InstCombiner&, llvm::IntrinsicInst&) const (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x197b8d)
#12 0x000071de1dadb0ca llvm::InstCombinerImpl::visitCallInst(llvm::CallInst&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMInstCombine.so.21.0git+0xad0ca)
#13 0x000071de1da8026c llvm::InstCombinerImpl::run() (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMInstCombine.so.21.0git+0x5226c)
#14 0x000071de1da81c8d combineInstructionsOverFunction(llvm::Function&, llvm::InstructionWorklist&, llvm::AAResults*, llvm::AssumptionCache&, llvm::TargetLibraryInfo&, llvm::TargetTransformInfo&, llvm::DominatorTree&, llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*, llvm::BranchProbabilityInfo*, llvm::ProfileSummaryInfo*, llvm::InstCombineOptions const&) InstructionCombining.cpp:0:0
#15 0x000071de1da82fce llvm::InstCombinePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMInstCombine.so.21.0git+0x54fce)
#16 0x000071de1d87ee6e llvm::detail::PassModel<llvm::Function, llvm::InstCombinePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x7ee6e)
#17 0x000071de1a8e375f llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x2e375f)
#18 0x000071de1f99e61e llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x39e61e)
#19 0x000071de1ad57d87 llvm::CGSCCToFunctionPassAdaptor::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x157d87)
#20 0x000071de1f99eb4e llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::CGSCCToFunctionPassAdaptor, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x39eb4e)
#21 0x000071de1ad50923 llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x150923)
#22 0x000071de1d87e53e llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x7e53e)
#23 0x000071de1ad58ff3 llvm::DevirtSCCRepeatedPass::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x158ff3)
#24 0x000071de1d87e55e llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::DevirtSCCRepeatedPass, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x7e55e)
#25 0x000071de1ad537f4 llvm::ModuleToPostOrderCGSCCPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x1537f4)
#26 0x000071de1d87e51e llvm::detail::PassModel<llvm::Module, llvm::ModuleToPostOrderCGSCCPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x7e51e)
#27 0x000071de1a8e26cc llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x2e26cc)
#28 0x000071de1c801ad5 llvm::ModuleInlinerWrapperPass::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMipo.so.21.0git+0x201ad5)
#29 0x000071de1d87df8e llvm::detail::PassModel<llvm::Module, llvm::ModuleInlinerWrapperPass, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-9-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x7df8e)

llvm-ci · 2025-03-06T14:39:08Z

LLVM Buildbot has detected a new failure on builder amdgpu-offload-rhel-8-cmake-build-only running on rocm-docker-rhel-8 while building llvm at step 4 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/204/builds/2554

Here is the relevant piece of the build log for the reference

Step 4 (annotate) failure: '../llvm-zorg/zorg/buildbot/builders/annotated/amdgpu-offload-cmake.py --jobs=32' (failure)
...
[46/60] Building LLVM bitcode Workshare.cpp-nvptx.bc
[47/60] Building LLVM bitcode Parallelism.cpp-amdgpu.bc
[48/60] Building LLVM bitcode Workshare.cpp-amdgpu.bc
[49/60] Building LLVM bitcode Parallelism.cpp-nvptx.bc
[50/60] Linking LLVM bitcode libomptarget-amdgpu.bc
[51/60] Linking LLVM bitcode libomptarget-nvptx.bc
[52/60] Internalizing LLVM bitcode libomptarget-amdgpu.bc
[53/60] Internalizing LLVM bitcode libomptarget-nvptx.bc
/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/opt: WARNING: failed to create target machine for 'nvptx64-nvidia-cuda': unable to get target for 'nvptx64-nvidia-cuda', see --version and --triple.
[54/60] Optimizing LLVM bitcode libomptarget-amdgpu.bc
FAILED: offload/DeviceRTL/libomptarget-amdgpu.bc /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc 
cd /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL && /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
opt: /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/llvm-project/llvm/lib/IR/Intrinsics.cpp:158: std::__cxx11::string getIntrinsicNameImpl(llvm::Intrinsic::ID, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool): Assertion `(Tys.empty() || Intrinsic::isOverloaded(Id)) && "This version of getName is for overloaded intrinsics only"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
1.	Running pass "require<globals-aa>,function(invalidate<aa>),require<profile-summary>,cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide)),function(invalidate<should-not-run-function-passes>),cgscc(devirt<4>())" on module "/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
2.	Running pass "cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide))" on module "/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
3.	Running pass "instcombine<max-iterations=1;no-verify-fixpoint>" on function "ompx_shfl_down_sync_f"
 #0 0x00007107f43d4a68 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMSupport.so.21.0git+0x1d4a68)
 #1 0x00007107f43d214c SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #2 0x0000710801b5fd10 __restore_rt (/lib64/libpthread.so.0+0x12d10)
 #3 0x00007107f354952f raise (/lib64/libc.so.6+0x4e52f)
 #4 0x00007107f351ce65 abort (/lib64/libc.so.6+0x21e65)
 #5 0x00007107f351cd39 _nl_load_domain.cold.0 (/lib64/libc.so.6+0x21d39)
 #6 0x00007107f3541e86 (/lib64/libc.so.6+0x46e86)
 #7 0x00007107f521430e getIntrinsicNameImpl(unsigned int, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool) Intrinsics.cpp:0:0
 #8 0x00007107f52144c2 llvm::Intrinsic::getOrInsertDeclaration(llvm::Module*, unsigned int, llvm::ArrayRef<llvm::Type*>) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x2144c2)
 #9 0x00007107ffda33d2 llvm::GCNTTIImpl::instCombineIntrinsic(llvm::InstCombiner&, llvm::IntrinsicInst&) const (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x1a33d2)
#10 0x00007107f80a1745 llvm::InstCombinerImpl::visitCallInst(llvm::CallInst&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMInstCombine.so.21.0git+0xa1745)
#11 0x00007107f804e7f1 llvm::InstCombinerImpl::run() (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMInstCombine.so.21.0git+0x4e7f1)
#12 0x00007107f8050080 combineInstructionsOverFunction(llvm::Function&, llvm::InstructionWorklist&, llvm::AAResults*, llvm::AssumptionCache&, llvm::TargetLibraryInfo&, llvm::TargetTransformInfo&, llvm::DominatorTree&, llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*, llvm::BranchProbabilityInfo*, llvm::ProfileSummaryInfo*, llvm::InstCombineOptions const&) (.isra.2065) InstructionCombining.cpp:0:0
#13 0x00007107f8050b24 llvm::InstCombinePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMInstCombine.so.21.0git+0x50b24)
#14 0x00007107fc083bdd llvm::detail::PassModel<llvm::Function, llvm::InstCombinePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x83bdd)
#15 0x00007107f52c8a7c llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x2c8a7c)
#16 0x00007107fff986cd llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x3986cd)
#17 0x00007107f6562a6f llvm::CGSCCToFunctionPassAdaptor::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x162a6f)
#18 0x00007107fff98b3d llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::CGSCCToFunctionPassAdaptor, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x398b3d)
#19 0x00007107f655aa9a llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x15aa9a)
#20 0x00007107fc0832ad llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x832ad)
#21 0x00007107f655f8f7 llvm::DevirtSCCRepeatedPass::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x15f8f7)
#22 0x00007107fc0832cd llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::DevirtSCCRepeatedPass, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x832cd)
#23 0x00007107f655d863 llvm::ModuleToPostOrderCGSCCPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x15d863)
#24 0x00007107fc08328d llvm::detail::PassModel<llvm::Module, llvm::ModuleToPostOrderCGSCCPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x8328d)
#25 0x00007107f52c72a4 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x2c72a4)
#26 0x00007107fa3f51e3 llvm::ModuleInlinerWrapperPass::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMipo.so.21.0git+0x1f51e3)
#27 0x00007107fc082cfd llvm::detail::PassModel<llvm::Module, llvm::ModuleInlinerWrapperPass, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x82cfd)
#28 0x00007107f52c72a4 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x2c72a4)
#29 0x0000710801820e8f llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMOptDriver.so.21.0git+0x20e8f)
Step 7 (build cmake config) failure: build cmake config (failure)
...
[46/60] Building LLVM bitcode Workshare.cpp-nvptx.bc
[47/60] Building LLVM bitcode Parallelism.cpp-amdgpu.bc
[48/60] Building LLVM bitcode Workshare.cpp-amdgpu.bc
[49/60] Building LLVM bitcode Parallelism.cpp-nvptx.bc
[50/60] Linking LLVM bitcode libomptarget-amdgpu.bc
[51/60] Linking LLVM bitcode libomptarget-nvptx.bc
[52/60] Internalizing LLVM bitcode libomptarget-amdgpu.bc
[53/60] Internalizing LLVM bitcode libomptarget-nvptx.bc
/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/opt: WARNING: failed to create target machine for 'nvptx64-nvidia-cuda': unable to get target for 'nvptx64-nvidia-cuda', see --version and --triple.
[54/60] Optimizing LLVM bitcode libomptarget-amdgpu.bc
FAILED: offload/DeviceRTL/libomptarget-amdgpu.bc /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc 
cd /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL && /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
opt: /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/llvm-project/llvm/lib/IR/Intrinsics.cpp:158: std::__cxx11::string getIntrinsicNameImpl(llvm::Intrinsic::ID, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool): Assertion `(Tys.empty() || Intrinsic::isOverloaded(Id)) && "This version of getName is for overloaded intrinsics only"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
1.	Running pass "require<globals-aa>,function(invalidate<aa>),require<profile-summary>,cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide)),function(invalidate<should-not-run-function-passes>),cgscc(devirt<4>())" on module "/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
2.	Running pass "cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide))" on module "/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
3.	Running pass "instcombine<max-iterations=1;no-verify-fixpoint>" on function "ompx_shfl_down_sync_f"
 #0 0x00007107f43d4a68 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMSupport.so.21.0git+0x1d4a68)
 #1 0x00007107f43d214c SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #2 0x0000710801b5fd10 __restore_rt (/lib64/libpthread.so.0+0x12d10)
 #3 0x00007107f354952f raise (/lib64/libc.so.6+0x4e52f)
 #4 0x00007107f351ce65 abort (/lib64/libc.so.6+0x21e65)
 #5 0x00007107f351cd39 _nl_load_domain.cold.0 (/lib64/libc.so.6+0x21d39)
 #6 0x00007107f3541e86 (/lib64/libc.so.6+0x46e86)
 #7 0x00007107f521430e getIntrinsicNameImpl(unsigned int, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool) Intrinsics.cpp:0:0
 #8 0x00007107f52144c2 llvm::Intrinsic::getOrInsertDeclaration(llvm::Module*, unsigned int, llvm::ArrayRef<llvm::Type*>) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x2144c2)
 #9 0x00007107ffda33d2 llvm::GCNTTIImpl::instCombineIntrinsic(llvm::InstCombiner&, llvm::IntrinsicInst&) const (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x1a33d2)
#10 0x00007107f80a1745 llvm::InstCombinerImpl::visitCallInst(llvm::CallInst&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMInstCombine.so.21.0git+0xa1745)
#11 0x00007107f804e7f1 llvm::InstCombinerImpl::run() (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMInstCombine.so.21.0git+0x4e7f1)
#12 0x00007107f8050080 combineInstructionsOverFunction(llvm::Function&, llvm::InstructionWorklist&, llvm::AAResults*, llvm::AssumptionCache&, llvm::TargetLibraryInfo&, llvm::TargetTransformInfo&, llvm::DominatorTree&, llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*, llvm::BranchProbabilityInfo*, llvm::ProfileSummaryInfo*, llvm::InstCombineOptions const&) (.isra.2065) InstructionCombining.cpp:0:0
#13 0x00007107f8050b24 llvm::InstCombinePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMInstCombine.so.21.0git+0x50b24)
#14 0x00007107fc083bdd llvm::detail::PassModel<llvm::Function, llvm::InstCombinePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x83bdd)
#15 0x00007107f52c8a7c llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x2c8a7c)
#16 0x00007107fff986cd llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x3986cd)
#17 0x00007107f6562a6f llvm::CGSCCToFunctionPassAdaptor::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x162a6f)
#18 0x00007107fff98b3d llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::CGSCCToFunctionPassAdaptor, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAMDGPUCodeGen.so.21.0git+0x398b3d)
#19 0x00007107f655aa9a llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x15aa9a)
#20 0x00007107fc0832ad llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x832ad)
#21 0x00007107f655f8f7 llvm::DevirtSCCRepeatedPass::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x15f8f7)
#22 0x00007107fc0832cd llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::DevirtSCCRepeatedPass, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x832cd)
#23 0x00007107f655d863 llvm::ModuleToPostOrderCGSCCPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMAnalysis.so.21.0git+0x15d863)
#24 0x00007107fc08328d llvm::detail::PassModel<llvm::Module, llvm::ModuleToPostOrderCGSCCPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x8328d)
#25 0x00007107f52c72a4 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x2c72a4)
#26 0x00007107fa3f51e3 llvm::ModuleInlinerWrapperPass::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMipo.so.21.0git+0x1f51e3)
#27 0x00007107fc082cfd llvm::detail::PassModel<llvm::Module, llvm::ModuleInlinerWrapperPass, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMPasses.so.21.0git+0x82cfd)
#28 0x00007107f52c72a4 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMCore.so.21.0git+0x2c72a4)
#29 0x0000710801820e8f llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) (/home/botworker/bbot/amdgpu-offload-rhel-8-cmake-build-only/build/bin/../lib/libLLVMOptDriver.so.21.0git+0x20e8f)

llvm-ci · 2025-03-06T14:41:21Z

LLVM Buildbot has detected a new failure on builder openmp-offload-amdgpu-runtime running on omp-vega20-0 while building llvm at step 5 "compile-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/17032

Here is the relevant piece of the build log for the reference

Step 5 (compile-openmp) failure: build (failure)
...
24.414 [40/34/1293] Generating exported symbols for clang_rt.tsan-x86_64
24.884 [39/34/1294] Building CXX object openmp/runtime/src/CMakeFiles/omp.dir/kmp_affinity.cpp.o
24.952 [38/34/1295] Building CXX object compiler-rt/lib/tsan/rtl/CMakeFiles/clang_rt.tsan-dynamic-x86_64.dir/tsan_interceptors_posix.cpp.o
25.019 [37/34/1296] Building CXX object compiler-rt/lib/memprof/CMakeFiles/RTMemprof_dynamic.x86_64.dir/memprof_interceptors.cpp.o
25.094 [36/34/1297] Linking C shared library openmp/runtime/src/libomp.so
25.117 [35/34/1298] Linking CXX shared library /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/lib/clang/21/lib/x86_64-unknown-linux-gnu/libclang_rt.tsan.so
25.252 [34/34/1299] Linking CXX shared library openmp/libompd/src/libompd.so
25.976 [33/34/1300] Building CXX object compiler-rt/lib/memprof/CMakeFiles/RTMemprof.x86_64.dir/memprof_interceptors.cpp.o
26.070 [32/34/1301] Linking CXX static library /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/lib/clang/21/lib/x86_64-unknown-linux-gnu/libclang_rt.memprof.a
26.087 [31/34/1302] Optimizing LLVM bitcode libomptarget-amdgpu.bc
FAILED: offload/DeviceRTL/libomptarget-amdgpu.bc /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc 
cd /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/DeviceRTL && /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
opt: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/llvm/lib/IR/Intrinsics.cpp:157: std::string getIntrinsicNameImpl(llvm::Intrinsic::ID, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool): Assertion `(Tys.empty() || Intrinsic::isOverloaded(Id)) && "This version of getName is for overloaded intrinsics only"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc -o /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/DeviceRTL/libomptarget-amdgpu.bc
1.	Running pass "require<globals-aa>,function(invalidate<aa>),require<profile-summary>,cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide)),function(invalidate<should-not-run-function-passes>),cgscc(devirt<4>())" on module "/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
2.	Running pass "cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function(amdgpu-promote-kernel-arguments,infer-address-spaces,amdgpu-lower-kernel-attributes,amdgpu-promote-alloca-to-vector),function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,amdgpu-usenative,amdgpu-simplifylib,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,amdgpu-usenative,amdgpu-simplifylib),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide))" on module "/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/DeviceRTL/internalized_libomptarget-amdgpu.bc"
3.	Running pass "instcombine<max-iterations=1;no-verify-fixpoint>" on function "ompx_shfl_down_sync_f"
 #0 0x0000560a6fb4bf3f llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x3b66f3f)
 #1 0x0000560a6fb49464 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #2 0x00007f67404db420 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x14420)
 #3 0x00007f673ffae00b raise (/lib/x86_64-linux-gnu/libc.so.6+0x4300b)
 #4 0x00007f673ff8d859 abort (/lib/x86_64-linux-gnu/libc.so.6+0x22859)
 #5 0x00007f673ff8d729 (/lib/x86_64-linux-gnu/libc.so.6+0x22729)
 #6 0x00007f673ff9efd6 (/lib/x86_64-linux-gnu/libc.so.6+0x33fd6)
 #7 0x0000560a6f886a92 getIntrinsicNameImpl(unsigned int, llvm::ArrayRef<llvm::Type*>, llvm::Module*, llvm::FunctionType*, bool) Intrinsics.cpp:0:0
 #8 0x0000560a6f886d29 llvm::Intrinsic::getOrInsertDeclaration(llvm::Module*, unsigned int, llvm::ArrayRef<llvm::Type*>) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x38a1d29)
 #9 0x0000560a6cdec8c0 llvm::GCNTTIImpl::instCombineIntrinsic(llvm::InstCombiner&, llvm::IntrinsicInst&) const (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0xe078c0)
#10 0x0000560a6e909878 llvm::InstCombinerImpl::visitCallInst(llvm::CallInst&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x2924878)
#11 0x0000560a6e8aeee0 llvm::InstCombinerImpl::run() (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x28c9ee0)
#12 0x0000560a6e8b08b6 combineInstructionsOverFunction(llvm::Function&, llvm::InstructionWorklist&, llvm::AAResults*, llvm::AssumptionCache&, llvm::TargetLibraryInfo&, llvm::TargetTransformInfo&, llvm::DominatorTree&, llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*, llvm::BranchProbabilityInfo*, llvm::ProfileSummaryInfo*, llvm::InstCombineOptions const&) (.isra.0) InstructionCombining.cpp:0:0
#13 0x0000560a6e8b1dd1 llvm::InstCombinePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x28ccdd1)
#14 0x0000560a6d78c0e6 llvm::detail::PassModel<llvm::Function, llvm::InstCombinePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x17a70e6)
#15 0x0000560a6f93f049 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x395a049)
#16 0x0000560a6c77b1d6 llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x7961d6)
#17 0x0000560a6ef50fca llvm::CGSCCToFunctionPassAdaptor::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x2f6bfca)
#18 0x0000560a6cb3c006 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::CGSCCToFunctionPassAdaptor, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0xb57006)
#19 0x0000560a6ef4902a llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x2f6402a)
#20 0x0000560a6d78b876 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x17a6876)
#21 0x0000560a6ef4dbad llvm::DevirtSCCRepeatedPass::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x2f68bad)
#22 0x0000560a6d78b8c6 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::DevirtSCCRepeatedPass, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x17a68c6)
#23 0x0000560a6ef4c018 llvm::ModuleToPostOrderCGSCCPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x2f67018)
#24 0x0000560a6d78b826 llvm::detail::PassModel<llvm::Module, llvm::ModuleToPostOrderCGSCCPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x17a6826)
#25 0x0000560a6f93d2d1 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x39582d1)
#26 0x0000560a6e124a38 llvm::ModuleInlinerWrapperPass::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x213fa38)
#27 0x0000560a6d78aee6 llvm::detail::PassModel<llvm::Module, llvm::ModuleInlinerWrapperPass, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x17a5ee6)
#28 0x0000560a6f93d2d1 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x39582d1)
#29 0x0000560a6c68f2e9 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) (/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/bin/opt+0x6aa2e9)

…uments (#129895)" This reverts commit be5149a. It caused build failures in the openmp-offload-amdgpu-runtime buildbot and others.

Reland llvm#129895 with a fix to avoid trying to combine bpermute of bitcast.

…lvm#129895)

…uments (llvm#129895)" This reverts commit be5149a. It caused build failures in the openmp-offload-amdgpu-runtime buildbot and others.

…130133) Reland #129895 with a fix to avoid trying to combine bpermute of bitcast.

…lvm#130133) Reland llvm#129895 with a fix to avoid trying to combine bpermute of bitcast.

[AMDGPU] InstCombine llvm.amdgcn.ds.bpermute with uniform arguments

4ffb583

llvmbot added backend:AMDGPU llvm:instcombine llvm:transforms labels Mar 5, 2025

jayfoad requested review from arsenm, nhaehnle, shiltian and tsymalla March 5, 2025 16:15

arsenm approved these changes Mar 6, 2025

View reviewed changes

Merge remote-tracking branch 'origin/main' into instcombine-ds-bpermute

1856f21

jayfoad merged commit be5149a into llvm:main Mar 6, 2025
6 of 9 checks passed

jayfoad deleted the instcombine-ds-bpermute branch March 6, 2025 14:32

jayfoad added a commit that referenced this pull request Mar 6, 2025

Revert "[AMDGPU] InstCombine llvm.amdgcn.ds.bpermute with uniform arg…

78281fd

…uments (#129895)" This reverts commit be5149a. It caused build failures in the openmp-offload-amdgpu-runtime buildbot and others.

jayfoad mentioned this pull request Mar 6, 2025

[AMDGPU] InstCombine llvm.amdgcn.ds.bpermute with uniform arguments #130133

Merged

jayfoad added a commit to jayfoad/llvm-project that referenced this pull request Mar 6, 2025

[AMDGPU] InstCombine llvm.amdgcn.ds.bpermute with uniform arguments

31ceb2a

Reland llvm#129895 with a fix to avoid trying to combine bpermute of bitcast.

jph-13 pushed a commit to jph-13/llvm-project that referenced this pull request Mar 21, 2025

[AMDGPU] InstCombine llvm.amdgcn.ds.bpermute with uniform arguments (l…

4115f0d

…lvm#129895)

jayfoad added a commit that referenced this pull request Apr 10, 2025

[AMDGPU] InstCombine llvm.amdgcn.ds.bpermute with uniform arguments (#…

e3350a6

…130133) Reland #129895 with a fix to avoid trying to combine bpermute of bitcast.

var-const pushed a commit to ldionne/llvm-project that referenced this pull request Apr 17, 2025

[AMDGPU] InstCombine llvm.amdgcn.ds.bpermute with uniform arguments (l…

07e4e43

…lvm#130133) Reland llvm#129895 with a fix to avoid trying to combine bpermute of bitcast.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] InstCombine llvm.amdgcn.ds.bpermute with uniform arguments #129895

[AMDGPU] InstCombine llvm.amdgcn.ds.bpermute with uniform arguments #129895

Uh oh!

jayfoad commented Mar 5, 2025

Uh oh!

llvmbot commented Mar 5, 2025 •

edited

Loading

Uh oh!

arsenm Mar 6, 2025

Uh oh!

jayfoad Mar 6, 2025

Uh oh!

Uh oh!

llvm-ci commented Mar 6, 2025

Uh oh!

llvm-ci commented Mar 6, 2025

Uh oh!

llvm-ci commented Mar 6, 2025

Uh oh!

llvm-ci commented Mar 6, 2025

Uh oh!

Uh oh!

[AMDGPU] InstCombine llvm.amdgcn.ds.bpermute with uniform arguments #129895

[AMDGPU] InstCombine llvm.amdgcn.ds.bpermute with uniform arguments #129895

Uh oh!

Conversation

jayfoad commented Mar 5, 2025

Uh oh!

llvmbot commented Mar 5, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm Mar 6, 2025

Choose a reason for hiding this comment

Uh oh!

jayfoad Mar 6, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Mar 6, 2025

Uh oh!

llvm-ci commented Mar 6, 2025

Uh oh!

llvm-ci commented Mar 6, 2025

Uh oh!

llvm-ci commented Mar 6, 2025

Uh oh!

Uh oh!

llvmbot commented Mar 5, 2025 •

edited

Loading