Skip to content

Commit 49d3cf0

Browse files
gregrodgersronlieb
authored andcommitted
OPENMP: fix kernel attributes for fixed blocksize
Change-Id: I121f6c8a1cf219c9561318b45272df4072b230c1
1 parent 1d5ff30 commit 49d3cf0

File tree

3 files changed

+12
-14
lines changed

3 files changed

+12
-14
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,10 +1097,9 @@ void CGOpenMPRuntimeGPU::GenerateMetaData(CodeGenModule &CGM,
10971097
if (IsGeneric)
10981098
compileTimeThreadLimit =
10991099
ComputeGenericWorkgroupSize(CGM, compileTimeThreadLimit);
1100-
std::string AttrVal = llvm::utostr(compileTimeThreadLimit);
11011100
FlatAttr = compileTimeThreadLimit;
11021101
OutlinedFn->addFnAttr("amdgpu-flat-work-group-size",
1103-
AttrVal + "," + AttrVal);
1102+
"1," + llvm::utostr(compileTimeThreadLimit));
11041103
flatAttrEmitted = true;
11051104
} // end > 0
11061105
} // end of amdgcn teams or parallel directive
@@ -1114,10 +1113,9 @@ void CGOpenMPRuntimeGPU::GenerateMetaData(CodeGenModule &CGM,
11141113
GenericModeWorkgroupSize =
11151114
ComputeGenericWorkgroupSize(CGM, CmdLineWorkGroupSz);
11161115

1117-
std::string FlatAttrVal = llvm::utostr(GenericModeWorkgroupSize);
11181116
FlatAttr = GenericModeWorkgroupSize;
11191117
OutlinedFn->addFnAttr("amdgpu-flat-work-group-size",
1120-
FlatAttrVal + "," + FlatAttrVal);
1118+
"1," + llvm::utostr(GenericModeWorkgroupSize));
11211119
}
11221120
// Emit a kernel descriptor for runtime.
11231121
setPropertyWorkGroupSize(CGM, OutlinedFn->getName(), FlatAttr);

clang/lib/CodeGen/TargetInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9418,7 +9418,7 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
94189418
const bool IsHIPKernel =
94199419
M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
94209420

9421-
if (IsHIPKernel)
9421+
if (IsHIPKernel || M.getLangOpts().OpenMPIsDevice)
94229422
F->addFnAttr("uniform-work-group-size", "true");
94239423

94249424
if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())

clang/test/OpenMP/amdgcn-attributes.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@ int callable(int x) {
3232
return x + 1;
3333
}
3434

35-
// DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="257,257" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
36-
// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="257,257" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
37-
// NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="257,257" "amdgpu-ieee"="false" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
38-
// UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="257,257" "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
39-
40-
// DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
41-
// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
42-
// NOIEEE: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
43-
// UNSAFEATOMIC: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
35+
// DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,257" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
36+
// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,257" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
37+
// NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,257" "amdgpu-ieee"="false" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
38+
// UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,257" "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
39+
40+
// DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
41+
// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
42+
// NOIEEE: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
43+
// UNSAFEATOMIC: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }

0 commit comments

Comments
 (0)