Skip to content

Commit 32073b8

Browse files
authored
AMDGPU: Do not generate non-temporal hint when Load_Tr intrinsic did not specify it (llvm#79104)
int_amdgcn_global_load_tr did not specify non-temporal load transpose, thus we should not genetrate the non-temporal hint for the load. We need to implement getTgtMemIntrinsic to create the corresponding MemSDNode. And we don't set the non-temporal flag because the intrinsic did not specify it. NOTE: We need to implement getTgtMemIntrinsic for any memory intrinsics.
1 parent 55a7bb0 commit 32073b8

File tree

3 files changed

+25
-32
lines changed

3 files changed

+25
-32
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,6 +1348,14 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
13481348
MachineMemOperand::MOVolatile;
13491349
return true;
13501350
}
1351+
case Intrinsic::amdgcn_global_load_tr: {
1352+
Info.opc = ISD::INTRINSIC_W_CHAIN;
1353+
Info.memVT = MVT::getVT(CI.getType());
1354+
Info.ptrVal = CI.getOperand(0);
1355+
Info.align.reset();
1356+
Info.flags |= MachineMemOperand::MOLoad;
1357+
return true;
1358+
}
13511359
case Intrinsic::amdgcn_ds_gws_init:
13521360
case Intrinsic::amdgcn_ds_gws_barrier:
13531361
case Intrinsic::amdgcn_ds_gws_sema_v:
@@ -1407,6 +1415,7 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
14071415
SmallVectorImpl<Value*> &Ops,
14081416
Type *&AccessTy) const {
14091417
switch (II->getIntrinsicID()) {
1418+
case Intrinsic::amdgcn_global_load_tr:
14101419
case Intrinsic::amdgcn_ds_ordered_add:
14111420
case Intrinsic::amdgcn_ds_ordered_swap:
14121421
case Intrinsic::amdgcn_ds_append:

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
1313
; GFX12-SDAG-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
1414
; GFX12-SDAG-W32-NEXT: v_mov_b32_e32 v2, 0
1515
; GFX12-SDAG-W32-NEXT: s_wait_kmcnt 0x0
16-
; GFX12-SDAG-W32-NEXT: global_load_tr_b64 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
16+
; GFX12-SDAG-W32-NEXT: global_load_tr_b64 v[0:1], v2, s[0:1] offset:32
1717
; GFX12-SDAG-W32-NEXT: s_wait_loadcnt 0x0
18-
; GFX12-SDAG-W32-NEXT: global_inv scope:SCOPE_SYS
1918
; GFX12-SDAG-W32-NEXT: global_store_b64 v2, v[0:1], s[2:3]
2019
; GFX12-SDAG-W32-NEXT: s_nop 0
2120
; GFX12-SDAG-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -26,9 +25,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
2625
; GFX12-GISEL-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
2726
; GFX12-GISEL-W32-NEXT: v_mov_b32_e32 v2, 0
2827
; GFX12-GISEL-W32-NEXT: s_wait_kmcnt 0x0
29-
; GFX12-GISEL-W32-NEXT: global_load_tr_b64 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
28+
; GFX12-GISEL-W32-NEXT: global_load_tr_b64 v[0:1], v2, s[0:1] offset:32
3029
; GFX12-GISEL-W32-NEXT: s_wait_loadcnt 0x0
31-
; GFX12-GISEL-W32-NEXT: global_inv scope:SCOPE_SYS
3230
; GFX12-GISEL-W32-NEXT: global_store_b64 v2, v[0:1], s[2:3]
3331
; GFX12-GISEL-W32-NEXT: s_nop 0
3432
; GFX12-GISEL-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -46,9 +44,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
4644
; GFX12-SDAG-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
4745
; GFX12-SDAG-W32-NEXT: v_mov_b32_e32 v4, 0
4846
; GFX12-SDAG-W32-NEXT: s_wait_kmcnt 0x0
49-
; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
47+
; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
5048
; GFX12-SDAG-W32-NEXT: s_wait_loadcnt 0x0
51-
; GFX12-SDAG-W32-NEXT: global_inv scope:SCOPE_SYS
5249
; GFX12-SDAG-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
5350
; GFX12-SDAG-W32-NEXT: s_nop 0
5451
; GFX12-SDAG-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -59,9 +56,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
5956
; GFX12-GISEL-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
6057
; GFX12-GISEL-W32-NEXT: v_mov_b32_e32 v4, 0
6158
; GFX12-GISEL-W32-NEXT: s_wait_kmcnt 0x0
62-
; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
59+
; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
6360
; GFX12-GISEL-W32-NEXT: s_wait_loadcnt 0x0
64-
; GFX12-GISEL-W32-NEXT: global_inv scope:SCOPE_SYS
6561
; GFX12-GISEL-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
6662
; GFX12-GISEL-W32-NEXT: s_nop 0
6763
; GFX12-GISEL-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -79,9 +75,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
7975
; GFX12-SDAG-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
8076
; GFX12-SDAG-W32-NEXT: v_mov_b32_e32 v4, 0
8177
; GFX12-SDAG-W32-NEXT: s_wait_kmcnt 0x0
82-
; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
78+
; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
8379
; GFX12-SDAG-W32-NEXT: s_wait_loadcnt 0x0
84-
; GFX12-SDAG-W32-NEXT: global_inv scope:SCOPE_SYS
8580
; GFX12-SDAG-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
8681
; GFX12-SDAG-W32-NEXT: s_nop 0
8782
; GFX12-SDAG-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -92,9 +87,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
9287
; GFX12-GISEL-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
9388
; GFX12-GISEL-W32-NEXT: v_mov_b32_e32 v4, 0
9489
; GFX12-GISEL-W32-NEXT: s_wait_kmcnt 0x0
95-
; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
90+
; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
9691
; GFX12-GISEL-W32-NEXT: s_wait_loadcnt 0x0
97-
; GFX12-GISEL-W32-NEXT: global_inv scope:SCOPE_SYS
9892
; GFX12-GISEL-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
9993
; GFX12-GISEL-W32-NEXT: s_nop 0
10094
; GFX12-GISEL-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -112,9 +106,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
112106
; GFX12-SDAG-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
113107
; GFX12-SDAG-W32-NEXT: v_mov_b32_e32 v4, 0
114108
; GFX12-SDAG-W32-NEXT: s_wait_kmcnt 0x0
115-
; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
109+
; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
116110
; GFX12-SDAG-W32-NEXT: s_wait_loadcnt 0x0
117-
; GFX12-SDAG-W32-NEXT: global_inv scope:SCOPE_SYS
118111
; GFX12-SDAG-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
119112
; GFX12-SDAG-W32-NEXT: s_nop 0
120113
; GFX12-SDAG-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -125,9 +118,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
125118
; GFX12-GISEL-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
126119
; GFX12-GISEL-W32-NEXT: v_mov_b32_e32 v4, 0
127120
; GFX12-GISEL-W32-NEXT: s_wait_kmcnt 0x0
128-
; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
121+
; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
129122
; GFX12-GISEL-W32-NEXT: s_wait_loadcnt 0x0
130-
; GFX12-GISEL-W32-NEXT: global_inv scope:SCOPE_SYS
131123
; GFX12-GISEL-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
132124
; GFX12-GISEL-W32-NEXT: s_nop 0
133125
; GFX12-GISEL-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
1313
; GFX12-SDAG-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
1414
; GFX12-SDAG-W64-NEXT: v_mov_b32_e32 v0, 0
1515
; GFX12-SDAG-W64-NEXT: s_wait_kmcnt 0x0
16-
; GFX12-SDAG-W64-NEXT: global_load_tr_b64 v1, v0, s[0:1] offset:32 th:TH_LOAD_NT
16+
; GFX12-SDAG-W64-NEXT: global_load_tr_b64 v1, v0, s[0:1] offset:32
1717
; GFX12-SDAG-W64-NEXT: s_wait_loadcnt 0x0
18-
; GFX12-SDAG-W64-NEXT: global_inv scope:SCOPE_SYS
1918
; GFX12-SDAG-W64-NEXT: global_store_b32 v0, v1, s[2:3]
2019
; GFX12-SDAG-W64-NEXT: s_nop 0
2120
; GFX12-SDAG-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -26,9 +25,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
2625
; GFX12-GISEL-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
2726
; GFX12-GISEL-W64-NEXT: v_mov_b32_e32 v0, 0
2827
; GFX12-GISEL-W64-NEXT: s_wait_kmcnt 0x0
29-
; GFX12-GISEL-W64-NEXT: global_load_tr_b64 v1, v0, s[0:1] offset:32 th:TH_LOAD_NT
28+
; GFX12-GISEL-W64-NEXT: global_load_tr_b64 v1, v0, s[0:1] offset:32
3029
; GFX12-GISEL-W64-NEXT: s_wait_loadcnt 0x0
31-
; GFX12-GISEL-W64-NEXT: global_inv scope:SCOPE_SYS
3230
; GFX12-GISEL-W64-NEXT: global_store_b32 v0, v1, s[2:3]
3331
; GFX12-GISEL-W64-NEXT: s_nop 0
3432
; GFX12-GISEL-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -46,9 +44,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
4644
; GFX12-SDAG-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
4745
; GFX12-SDAG-W64-NEXT: v_mov_b32_e32 v2, 0
4846
; GFX12-SDAG-W64-NEXT: s_wait_kmcnt 0x0
49-
; GFX12-SDAG-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
47+
; GFX12-SDAG-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
5048
; GFX12-SDAG-W64-NEXT: s_wait_loadcnt 0x0
51-
; GFX12-SDAG-W64-NEXT: global_inv scope:SCOPE_SYS
5249
; GFX12-SDAG-W64-NEXT: global_store_b64 v2, v[0:1], s[2:3]
5350
; GFX12-SDAG-W64-NEXT: s_nop 0
5451
; GFX12-SDAG-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -59,9 +56,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
5956
; GFX12-GISEL-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
6057
; GFX12-GISEL-W64-NEXT: v_mov_b32_e32 v2, 0
6158
; GFX12-GISEL-W64-NEXT: s_wait_kmcnt 0x0
62-
; GFX12-GISEL-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
59+
; GFX12-GISEL-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
6360
; GFX12-GISEL-W64-NEXT: s_wait_loadcnt 0x0
64-
; GFX12-GISEL-W64-NEXT: global_inv scope:SCOPE_SYS
6561
; GFX12-GISEL-W64-NEXT: global_store_b64 v2, v[0:1], s[2:3]
6662
; GFX12-GISEL-W64-NEXT: s_nop 0
6763
; GFX12-GISEL-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -79,9 +75,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
7975
; GFX12-SDAG-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
8076
; GFX12-SDAG-W64-NEXT: v_mov_b32_e32 v2, 0
8177
; GFX12-SDAG-W64-NEXT: s_wait_kmcnt 0x0
82-
; GFX12-SDAG-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
78+
; GFX12-SDAG-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
8379
; GFX12-SDAG-W64-NEXT: s_wait_loadcnt 0x0
84-
; GFX12-SDAG-W64-NEXT: global_inv scope:SCOPE_SYS
8580
; GFX12-SDAG-W64-NEXT: global_store_b64 v2, v[0:1], s[2:3]
8681
; GFX12-SDAG-W64-NEXT: s_nop 0
8782
; GFX12-SDAG-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -92,9 +87,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
9287
; GFX12-GISEL-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
9388
; GFX12-GISEL-W64-NEXT: v_mov_b32_e32 v2, 0
9489
; GFX12-GISEL-W64-NEXT: s_wait_kmcnt 0x0
95-
; GFX12-GISEL-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
90+
; GFX12-GISEL-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
9691
; GFX12-GISEL-W64-NEXT: s_wait_loadcnt 0x0
97-
; GFX12-GISEL-W64-NEXT: global_inv scope:SCOPE_SYS
9892
; GFX12-GISEL-W64-NEXT: global_store_b64 v2, v[0:1], s[2:3]
9993
; GFX12-GISEL-W64-NEXT: s_nop 0
10094
; GFX12-GISEL-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -112,9 +106,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
112106
; GFX12-SDAG-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
113107
; GFX12-SDAG-W64-NEXT: v_mov_b32_e32 v2, 0
114108
; GFX12-SDAG-W64-NEXT: s_wait_kmcnt 0x0
115-
; GFX12-SDAG-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
109+
; GFX12-SDAG-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
116110
; GFX12-SDAG-W64-NEXT: s_wait_loadcnt 0x0
117-
; GFX12-SDAG-W64-NEXT: global_inv scope:SCOPE_SYS
118111
; GFX12-SDAG-W64-NEXT: global_store_b64 v2, v[0:1], s[2:3]
119112
; GFX12-SDAG-W64-NEXT: s_nop 0
120113
; GFX12-SDAG-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -125,9 +118,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
125118
; GFX12-GISEL-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
126119
; GFX12-GISEL-W64-NEXT: v_mov_b32_e32 v2, 0
127120
; GFX12-GISEL-W64-NEXT: s_wait_kmcnt 0x0
128-
; GFX12-GISEL-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
121+
; GFX12-GISEL-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
129122
; GFX12-GISEL-W64-NEXT: s_wait_loadcnt 0x0
130-
; GFX12-GISEL-W64-NEXT: global_inv scope:SCOPE_SYS
131123
; GFX12-GISEL-W64-NEXT: global_store_b64 v2, v[0:1], s[2:3]
132124
; GFX12-GISEL-W64-NEXT: s_nop 0
133125
; GFX12-GISEL-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)

0 commit comments

Comments
 (0)