Skip to content

Commit b8025d1

Browse files
authored
Reapply "[AMDGPU] Add InstCombine rule for ballot.i64 intrinsic in wave32 mode." (#80303)
Reapply #71556 with added lit test constraint: `REQUIRES: amdgpu-registered-target`. This reverts commit 9791e54.
1 parent 319f4c0 commit b8025d1

File tree

4 files changed

+21
-9
lines changed

4 files changed

+21
-9
lines changed

clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// REQUIRES: amdgpu-registered-target
12
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -D__AMDGCN_WAVEFRONT_SIZE=32 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
23
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
34
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
@@ -24,23 +25,19 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b)
2425
}
2526

2627
// CHECK-LABEL: @test_read_exec(
27-
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
28+
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
2829
void test_read_exec(global uint* out) {
2930
*out = __builtin_amdgcn_read_exec();
3031
}
3132

32-
// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
33-
3433
// CHECK-LABEL: @test_read_exec_lo(
3534
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
3635
void test_read_exec_lo(global uint* out) {
3736
*out = __builtin_amdgcn_read_exec_lo();
3837
}
3938

4039
// CHECK-LABEL: @test_read_exec_hi(
41-
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
42-
// CHECK: lshr i64 [[A:%.*]], 32
43-
// CHECK: trunc i64 [[B:%.*]] to i32
40+
// CHECK: store i32 0, ptr addrspace(1) %out
4441
void test_read_exec_hi(global uint* out) {
4542
*out = __builtin_amdgcn_read_exec_hi();
4643
}

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2391,7 +2391,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
23912391
auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
23922392
if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
23932393
isNullConstant(Cond->getOperand(1)) &&
2394-
// TODO: make condition below an assert after fixing ballot bitwidth.
2394+
// We may encounter ballot.i64 in wave32 mode on -O0.
23952395
VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) {
23962396
// %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
23972397
// %C = i1 ISD::SETCC %VCMP, 0, setne/seteq

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,19 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
990990
return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
991991
}
992992
}
993+
if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
994+
// %b64 = call i64 ballot.i64(...)
995+
// =>
996+
// %b32 = call i32 ballot.i32(...)
997+
// %b64 = zext i32 %b32 to i64
998+
Value *Call = IC.Builder.CreateZExt(
999+
IC.Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot,
1000+
{IC.Builder.getInt32Ty()},
1001+
{II.getArgOperand(0)}),
1002+
II.getType());
1003+
Call->takeName(&II);
1004+
return IC.replaceInstUsesWith(II, Call);
1005+
}
9931006
break;
9941007
}
9951008
case Intrinsic::amdgcn_wqm_vote: {

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2599,7 +2599,8 @@ declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent
25992599

26002600
define i64 @ballot_nocombine_64(i1 %i) {
26012601
; CHECK-LABEL: @ballot_nocombine_64(
2602-
; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]])
2602+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]])
2603+
; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64
26032604
; CHECK-NEXT: ret i64 [[B]]
26042605
;
26052606
%b = call i64 @llvm.amdgcn.ballot.i64(i1 %i)
@@ -2616,7 +2617,8 @@ define i64 @ballot_zero_64() {
26162617

26172618
define i64 @ballot_one_64() {
26182619
; CHECK-LABEL: @ballot_one_64(
2619-
; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
2620+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true)
2621+
; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64
26202622
; CHECK-NEXT: ret i64 [[B]]
26212623
;
26222624
%b = call i64 @llvm.amdgcn.ballot.i64(i1 1)

0 commit comments

Comments
 (0)