-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AMDGPU] Use directive for kernarg preload header padding #86004
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Austin Kerbow (kerbowa) ChangesPatch is 406.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86004.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 4742b0b3e52ecf..697b986bca4995 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -283,6 +283,16 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
return true;
}
+bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
+ const MCSubtargetInfo &STI, bool TrapEnabled) {
+ const char *TrapInstr = TrapEnabled ? "\ts_trap 2" : "\ts_endpgm";
+ OS << TrapInstr
+ << " ; Kernarg preload header. Trap with incompatible firmware that "
+ "doesn't support preloading kernel arguments.\n";
+ OS << "\t.fill 63, 4, 0xbf800000 ; s_nop 0\n";
+ return true;
+}
+
bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
const uint32_t Encoded_s_code_end = 0xbf9f0000;
const uint32_t Encoded_s_nop = 0xbf800000;
@@ -781,18 +791,6 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
return true;
}
-bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
- const MCSubtargetInfo &STI, bool TrapEnabled) {
- const char *TrapInstr = TrapEnabled ? "\ts_trap 2" : "\ts_endpgm";
- OS << TrapInstr
- << " ; Trap with incompatible firmware that doesn't "
- "support preloading kernel arguments.\n";
- for (int i = 0; i < 63; ++i) {
- OS << "\ts_nop 0\n";
- }
- return true;
-}
-
bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader(
const MCSubtargetInfo &STI, bool TrapEnabled) {
const uint32_t Encoded_s_nop = 0xbf800000;
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll b/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll
index a70488a00db739..a030f86da1b67d 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll
@@ -1,17 +1,20 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,HSA %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,NON-HSA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,HSA,ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA,OBJ %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,NON-HSA,OBJ %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA,OBJ %s
; GCN: preload_kernarg_header
; HSA: s_trap 2
; NON-HSA: s_endpgm
-; GCN-COUNT-63: s_nop 0
+; ASM: .fill 63, 4, 0xbf800000 ; s_nop 0
+; OBJ-COUNT-63: s_nop 0
define amdgpu_kernel void @preload_kernarg_header(ptr %arg) {
store ptr %arg, ptr %arg
ret void
}
; GCN: non_kernel_function
+; GCN-NOT: s_trap 2
; GCN-NOT: s_nop 0
; GCN: flat_store
define void @non_kernel_function(ptr %arg) {
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
index d20c3a4007ffdd..f0e709b5a17279 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
@@ -24,70 +24,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX940-NO-PRELOAD-NEXT: s_endpgm
;
; GFX940-PRELOAD-1-LABEL: ptr1_i8:
-; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-1-NEXT: ; %bb.0:
; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
@@ -98,70 +36,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX940-PRELOAD-1-NEXT: s_endpgm
;
; GFX940-PRELOAD-2-LABEL: ptr1_i8:
-; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-2-NEXT: ; %bb.0:
; GFX940-PRELOAD-2-NEXT: s_and_b32 s0, s4, 0xff
; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
@@ -170,70 +46,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX940-PRELOAD-2-NEXT: s_endpgm
;
; GFX940-PRELOAD-4-LABEL: ptr1_i8:
-; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-4-NEXT: ; %bb.0:
; GFX940-PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xff
; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
@@ -242,70 +56,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX940-PRELOAD-4-NEXT: s_endpgm
;
; GFX940-PRELOAD-8-LABEL: ptr1_i8:
-; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-8-NEXT: ; %bb.0:
; GFX940-PRELOAD-8-NEXT: s_and_b32 s0, s4, 0xff
; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
@@ -325,70 +77,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX90a-NO-PRELOAD-NEXT: s_endpgm
;
; GFX90a-PRELOAD-1-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8
; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
@@ -399,70 +89,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX90a-PRELOAD-1-NEXT: s_endpgm
;
; GFX90a-PRELOAD-2-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOA...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm with nit
@@ -283,6 +283,16 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( | |||
return true; | |||
} | |||
|
|||
bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader( | |||
const MCSubtargetInfo &STI, bool TrapEnabled) { | |||
const char *TrapInstr = TrapEnabled ? "\ts_trap 2" : "\ts_endpgm"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
StringLiteral?
e076ecc
to
639405f
Compare
639405f
to
188b1b8
Compare
(cherry picked from commit b5b34db) Change-Id: Ib6d6a4089f5d6601536eeadd1917cf3121e6f4cc
No description provided.