Skip to content

Commit 5447017

Browse files
jwanggit86Jun Wang
and
Jun Wang
authored
[AMDGPU] Add inreg support for SGPR arguments (llvm#67182)
Function parameters marked with inreg are supposed to be allocated to SGPRs. However, for compute functions, this is ignored and function parameters are allocated to VGPRs. This fix modifies CC_AMDGPU_Func in AMDGPUCallingConv.td to use SGPRs if input arg is marked inreg. --------- Co-authored-by: Jun Wang <[email protected]>
1 parent c99951d commit 5447017

File tree

7 files changed

+1847
-13
lines changed

7 files changed

+1847
-13
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,11 @@ def CC_AMDGPU_Func : CallingConv<[
187187
CCIfByVal<CCPassByVal<4, 4>>,
188188
CCIfType<[i1], CCPromoteToType<i32>>,
189189
CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
190+
191+
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<
192+
!foreach(i, !range(0, 30), !cast<Register>("SGPR"#i)) // SGPR0-29
193+
>>>,
194+
190195
CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[
191196
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
192197
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2665,6 +2665,11 @@ SDValue SITargetLowering::LowerFormalArguments(
26652665

26662666
if (!IsKernel) {
26672667
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, isVarArg);
2668+
if (!IsGraphics && !Subtarget->enableFlatScratch()) {
2669+
CCInfo.AllocateRegBlock(ArrayRef<MCPhysReg>{AMDGPU::SGPR0, AMDGPU::SGPR1,
2670+
AMDGPU::SGPR2, AMDGPU::SGPR3},
2671+
4);
2672+
}
26682673
CCInfo.AnalyzeFormalArguments(Splits, AssignFn);
26692674
}
26702675

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2529,8 +2529,8 @@ bool isArgPassedInSGPR(const Argument *A) {
25292529
return A->hasAttribute(Attribute::InReg) ||
25302530
A->hasAttribute(Attribute::ByVal);
25312531
default:
2532-
// TODO: Should calls support inreg for SGPR inputs?
2533-
return false;
2532+
// TODO: treat i1 as divergent?
2533+
return A->hasAttribute(Attribute::InReg);
25342534
}
25352535
}
25362536

@@ -2556,8 +2556,7 @@ bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
25562556
return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
25572557
CB->paramHasAttr(ArgNo, Attribute::ByVal);
25582558
default:
2559-
// TODO: Should calls support inreg for SGPR inputs?
2560-
return false;
2559+
return CB->paramHasAttr(ArgNo, Attribute::InReg);
25612560
}
25622561
}
25632562

llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,15 @@ define i32 @asm_sgpr(i32 %divergent) {
3939
ret i32 %sgpr
4040
}
4141

42+
; SGPR asm outputs are uniform regardless of the input operands.
43+
; Argument not divergent if marked inreg.
44+
; CHECK-LABEL: for function 'asm_sgpr_inreg_arg':
45+
; CHECK-NOT: DIVERGENT
46+
define i32 @asm_sgpr_inreg_arg(i32 inreg %divergent) {
47+
%sgpr = call i32 asm "; def $0, $1","=s,v"(i32 %divergent)
48+
ret i32 %sgpr
49+
}
50+
4251
; CHECK-LABEL: for function 'asm_mixed_sgpr_vgpr':
4352
; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1, $2", "=s,=v,v"(i32 %divergent)
4453
; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0
@@ -58,6 +67,18 @@ define void @single_lane_func_arguments(i32 %i32, i1 %i1) #2 {
5867
ret void
5968
}
6069

70+
; CHECK-LABEL: for function 'divergent_args':
71+
; CHECK: DIVERGENT ARGUMENTS
72+
define void @divergent_args(i32 %i32, i1 %i1) {
73+
ret void
74+
}
75+
76+
; CHECK-LABEL: for function 'no_divergent_args_if_inreg':
77+
; CHECK-NOT: DIVERGENT
78+
define void @no_divergent_args_if_inreg(i32 inreg %i32, i1 inreg %i1) {
79+
ret void
80+
}
81+
6182
declare i32 @llvm.amdgcn.workitem.id.x() #0
6283
declare i32 @llvm.amdgcn.readfirstlane(i32) #0
6384
declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #1

llvm/test/Analysis/UniformityAnalysis/AMDGPU/kernel-args.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@ define amdgpu_kernel void @test_amdgpu_kernel(ptr addrspace(4) byref([4 x <16 x
3030
; CHECK: DIVERGENT:
3131
; CHECK: DIVERGENT:
3232
; CHECK: DIVERGENT:
33-
; CHECK: DIVERGENT:
34-
; CHECK: DIVERGENT:
3533
define void @test_c(ptr addrspace(5) byval([4 x <16 x i8>]) %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
3634
ret void
3735
}

0 commit comments

Comments
 (0)