Skip to content

Commit ff1c112

Browse files
shiltianarsenm
authored andcommitted
AMDGPU/clang: Add global_load_lds size check support for gfx950
Co-authored-by: Shilei Tian <[email protected]>
1 parent 5a11a72 commit ff1c112

File tree

6 files changed

+82
-13
lines changed

6 files changed

+82
-13
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12787,5 +12787,5 @@ def err_acc_loop_not_monotonic
1278712787

1278812788
// AMDGCN builtins diagnostics
1278912789
def err_amdgcn_global_load_lds_size_invalid_value : Error<"invalid size value">;
12790-
def note_amdgcn_global_load_lds_size_valid_value : Note<"size must be 1, 2, or 4">;
12790+
def note_amdgcn_global_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
1279112791
} // end of sema component.

clang/lib/Sema/SemaAMDGPU.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
2626
CallExpr *TheCall) {
2727
// position of memory order and scope arguments in the builtin
2828
unsigned OrderIndex, ScopeIndex;
29+
30+
const auto *FD = SemaRef.getCurFunctionDecl();
31+
assert(FD && "AMDGPU builtins should not be used outside of a function");
32+
llvm::StringMap<bool> CallerFeatureMap;
33+
getASTContext().getFunctionFeatureMap(CallerFeatureMap, FD);
34+
bool HasGFX950Insts =
35+
Builtin::evaluateRequiredTargetFeatures("gfx950-insts", CallerFeatureMap);
36+
2937
switch (BuiltinID) {
3038
case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
3139
constexpr const int SizeIdx = 2;
@@ -39,13 +47,19 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
3947
case 2:
4048
case 4:
4149
return false;
50+
case 12:
51+
case 16: {
52+
if (HasGFX950Insts)
53+
return false;
54+
[[fallthrough]];
55+
}
4256
default:
4357
Diag(ArgExpr->getExprLoc(),
4458
diag::err_amdgcn_global_load_lds_size_invalid_value)
4559
<< ArgExpr->getSourceRange();
4660
Diag(ArgExpr->getExprLoc(),
4761
diag::note_amdgcn_global_load_lds_size_valid_value)
48-
<< ArgExpr->getSourceRange();
62+
<< HasGFX950Insts << ArgExpr->getSourceRange();
4963
return true;
5064
}
5165
}

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1765,3 +1765,33 @@ void test_cvt_sr_f16_f32(global half2 *out, float src, uint seed)
17651765
*out = __builtin_amdgcn_cvt_sr_f16_f32(*out, src, seed, 0);
17661766
*out = __builtin_amdgcn_cvt_sr_f16_f32(*out, src, seed, 1);
17671767
}
1768+
1769+
// CHECK-LABEL: @test_global_load_lds_96(
1770+
// CHECK-NEXT: entry:
1771+
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1772+
// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
1773+
// CHECK-NEXT: store ptr addrspace(1) [[SRC:%.*]], ptr addrspace(5) [[SRC_ADDR]], align 8
1774+
// CHECK-NEXT: store ptr addrspace(3) [[DST:%.*]], ptr addrspace(5) [[DST_ADDR]], align 4
1775+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[SRC_ADDR]], align 8
1776+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[DST_ADDR]], align 4
1777+
// CHECK-NEXT: call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) [[TMP0]], ptr addrspace(3) [[TMP1]], i32 12, i32 0, i32 0)
1778+
// CHECK-NEXT: ret void
1779+
//
1780+
void test_global_load_lds_96(global void* src, local void *dst) {
1781+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0);
1782+
}
1783+
1784+
// CHECK-LABEL: @test_global_load_lds_128(
1785+
// CHECK-NEXT: entry:
1786+
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1787+
// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
1788+
// CHECK-NEXT: store ptr addrspace(1) [[SRC:%.*]], ptr addrspace(5) [[SRC_ADDR]], align 8
1789+
// CHECK-NEXT: store ptr addrspace(3) [[DST:%.*]], ptr addrspace(5) [[DST_ADDR]], align 4
1790+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[SRC_ADDR]], align 8
1791+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[DST_ADDR]], align 4
1792+
// CHECK-NEXT: call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) [[TMP0]], ptr addrspace(3) [[TMP1]], i32 16, i32 0, i32 0)
1793+
// CHECK-NEXT: ret void
1794+
//
1795+
void test_global_load_lds_128(global void* src, local void *dst) {
1796+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0);
1797+
}

clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s
2-
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify -S -o - %s
3-
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -target-feature +wavefrontsize64 -verify -S -o - %s
4-
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize64 -verify -S -o - %s
2+
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify=expected,gfx9 -S -o - %s
3+
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -target-feature +wavefrontsize64 -verify=expected,wavefront64 -S -o - %s
4+
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize64 -verify=expected,wavefront64 -S -o - %s
55
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize32 -verify -S -o - %s
66

77
// REQUIRES: amdgpu-registered-target
@@ -14,5 +14,5 @@ void test_ballot_wave32(global uint* out, int a, int b) {
1414

1515
__attribute__((target("wavefrontsize32"))) // gfx9-error@*:* {{option 'wavefrontsize32' cannot be specified on this target}}
1616
void test_ballot_wave32_target_attr(global uint* out, int a, int b) {
17-
*out = __builtin_amdgcn_ballot_w32(a == b);
17+
*out = __builtin_amdgcn_ballot_w32(a == b); // wavefront64-error@*:* {{invalid feature combination: 'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive}}
1818
}
Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -verify -o - %s
1+
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -verify=gfx940,expected -o - %s
2+
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify=gfx950,expected -o - %s
23
// REQUIRES: amdgpu-registered-target
34

45
typedef unsigned int u32;
@@ -7,10 +8,16 @@ void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst, u32
78
__builtin_amdgcn_global_load_lds(src, dst, size, /*offset=*/0, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
89
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, offset, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
910
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, /*offset=*/0, aux); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
10-
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
11-
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
12-
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
13-
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
14-
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
15-
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/-1, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
11+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
12+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
13+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
14+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0); // gfx940-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}}
15+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0); // gfx940-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}}
16+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/-1, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
17+
}
18+
19+
__attribute__((target("gfx950-insts")))
20+
void test_global_load_lds_via_target_feature(global u32* src, local u32 *dst, u32 size, u32 offset, u32 aux) {
21+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0);
22+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0);
1623
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: %clang_cc1 -triple amdgcn-- -verify=default -S -o - %s
2+
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify=gfx9 -S -o - %s
3+
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify=gfx10 -S -o - %s
4+
// RUN: not %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -target-feature -wavefrontsize32 -S -o - %s 2>&1 | FileCheck --check-prefix=GFX9 %s
5+
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize32 -verify=gfx10 -S -o - %s
6+
7+
// REQUIRES: amdgpu-registered-target
8+
9+
// default-no-diagnostics
10+
// gfx10-no-diagnostics
11+
12+
typedef unsigned int uint;
13+
14+
// GFX9: error: option 'wavefrontsize32' cannot be specified on this target
15+
__attribute__((target("wavefrontsize32"))) // gfx9-error@*:* {{option 'wavefrontsize32' cannot be specified on this target}}
16+
void test_ballot_wave32_target_attr(global uint* out, int a, int b) {
17+
*out = __builtin_amdgcn_ballot_w32(a == b);
18+
}

0 commit comments

Comments
 (0)