Skip to content

Commit b705154

Browse files
committed
[AMDGPU] add missing checks in processBaseWithConstOffset
1 parent fc66eaa commit b705154

File tree

2 files changed

+101
-0
lines changed

2 files changed

+101
-0
lines changed

llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2117,6 +2117,9 @@ void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base
21172117
BaseLo = *Src0;
21182118
}
21192119

2120+
if (!BaseLo.isReg())
2121+
return;
2122+
21202123
Src0 = TII->getNamedOperand(*BaseHiDef, AMDGPU::OpName::src0);
21212124
Src1 = TII->getNamedOperand(*BaseHiDef, AMDGPU::OpName::src1);
21222125

@@ -2129,6 +2132,9 @@ void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base
21292132
uint64_t Offset1 = Src1->getImm();
21302133
BaseHi = *Src0;
21312134

2135+
if (!BaseHi.isReg())
2136+
return;
2137+
21322138
Addr.Base.LoReg = BaseLo.getReg();
21332139
Addr.Base.HiReg = BaseHi.getReg();
21342140
Addr.Base.LoSubReg = BaseLo.getSubReg();

llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2562,4 +2562,99 @@ entry:
25622562
ret void
25632563
}
25642564

2565+
define amdgpu_kernel void @negativeoffsetnullptr(ptr %buffer) {
2566+
; GFX8-LABEL: negativeoffsetnullptr:
2567+
; GFX8: ; %bb.0: ; %entry
2568+
; GFX8-NEXT: s_load_dword s1, s[2:3], 0xec
2569+
; GFX8-NEXT: s_add_u32 s0, 0, -1
2570+
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2571+
; GFX8-NEXT: s_addc_u32 s1, s1, -1
2572+
; GFX8-NEXT: v_mov_b32_e32 v0, s0
2573+
; GFX8-NEXT: v_mov_b32_e32 v1, s1
2574+
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2575+
; GFX8-NEXT: s_mov_b64 s[0:1], 0
2576+
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2577+
; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
2578+
; GFX8-NEXT: .LBB8_1: ; %branch
2579+
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
2580+
; GFX8-NEXT: s_and_b64 s[2:3], exec, vcc
2581+
; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
2582+
; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1]
2583+
; GFX8-NEXT: s_cbranch_execnz .LBB8_1
2584+
; GFX8-NEXT: ; %bb.2: ; %end
2585+
; GFX8-NEXT: s_endpgm
2586+
;
2587+
; GFX9-LABEL: negativeoffsetnullptr:
2588+
; GFX9: ; %bb.0: ; %entry
2589+
; GFX9-NEXT: s_mov_b64 s[0:1], src_private_base
2590+
; GFX9-NEXT: v_mov_b32_e32 v1, s1
2591+
; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, -1, 0
2592+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2593+
; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
2594+
; GFX9-NEXT: s_mov_b64 s[0:1], 0
2595+
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2596+
; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
2597+
; GFX9-NEXT: .LBB8_1: ; %branch
2598+
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
2599+
; GFX9-NEXT: s_and_b64 s[2:3], exec, vcc
2600+
; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
2601+
; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1]
2602+
; GFX9-NEXT: s_cbranch_execnz .LBB8_1
2603+
; GFX9-NEXT: ; %bb.2: ; %end
2604+
; GFX9-NEXT: s_endpgm
2605+
;
2606+
; GFX10-LABEL: negativeoffsetnullptr:
2607+
; GFX10: ; %bb.0: ; %entry
2608+
; GFX10-NEXT: s_mov_b64 s[0:1], src_private_base
2609+
; GFX10-NEXT: s_add_u32 s0, 0, -1
2610+
; GFX10-NEXT: s_addc_u32 s1, s1, -1
2611+
; GFX10-NEXT: v_mov_b32_e32 v0, s0
2612+
; GFX10-NEXT: v_mov_b32_e32 v1, s1
2613+
; GFX10-NEXT: s_mov_b32 s0, 0
2614+
; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
2615+
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2616+
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
2617+
; GFX10-NEXT: .LBB8_1: ; %branch
2618+
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
2619+
; GFX10-NEXT: s_and_b32 s1, exec_lo, vcc_lo
2620+
; GFX10-NEXT: s_or_b32 s0, s1, s0
2621+
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
2622+
; GFX10-NEXT: s_cbranch_execnz .LBB8_1
2623+
; GFX10-NEXT: ; %bb.2: ; %end
2624+
; GFX10-NEXT: s_endpgm
2625+
;
2626+
; GFX11-LABEL: negativeoffsetnullptr:
2627+
; GFX11: ; %bb.0: ; %entry
2628+
; GFX11-NEXT: s_mov_b64 s[0:1], src_private_base
2629+
; GFX11-NEXT: v_add_co_u32 v0, s0, -1, 0
2630+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2631+
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
2632+
; GFX11-NEXT: s_mov_b32 s0, 0
2633+
; GFX11-NEXT: flat_load_u8 v0, v[0:1]
2634+
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2635+
; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
2636+
; GFX11-NEXT: .LBB8_1: ; %branch
2637+
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
2638+
; GFX11-NEXT: s_and_b32 s1, exec_lo, vcc_lo
2639+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2640+
; GFX11-NEXT: s_or_b32 s0, s1, s0
2641+
; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
2642+
; GFX11-NEXT: s_cbranch_execnz .LBB8_1
2643+
; GFX11-NEXT: ; %bb.2: ; %end
2644+
; GFX11-NEXT: s_endpgm
2645+
entry:
2646+
%null = select i1 false, ptr %buffer, ptr addrspacecast (ptr addrspace(5) null to ptr)
2647+
%gep = getelementptr i8, ptr %null, i64 -1
2648+
%ld = load i8, ptr %gep
2649+
%cmp = icmp eq i8 %ld, 0
2650+
br label %branch
2651+
2652+
branch:
2653+
br i1 %cmp, label %end, label %branch
2654+
2655+
end:
2656+
ret void
2657+
}
2658+
2659+
25652660
attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }

0 commit comments

Comments
 (0)