@@ -2562,4 +2562,99 @@ entry:
2562
2562
ret void
2563
2563
}
2564
2564
2565
+ define amdgpu_kernel void @negativeoffsetnullptr (ptr %buffer ) {
2566
+ ; GFX8-LABEL: negativeoffsetnullptr:
2567
+ ; GFX8: ; %bb.0: ; %entry
2568
+ ; GFX8-NEXT: s_load_dword s1, s[2:3], 0xec
2569
+ ; GFX8-NEXT: s_add_u32 s0, 0, -1
2570
+ ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2571
+ ; GFX8-NEXT: s_addc_u32 s1, s1, -1
2572
+ ; GFX8-NEXT: v_mov_b32_e32 v0, s0
2573
+ ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2574
+ ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
2575
+ ; GFX8-NEXT: s_mov_b64 s[0:1], 0
2576
+ ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2577
+ ; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
2578
+ ; GFX8-NEXT: .LBB8_1: ; %branch
2579
+ ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
2580
+ ; GFX8-NEXT: s_and_b64 s[2:3], exec, vcc
2581
+ ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
2582
+ ; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1]
2583
+ ; GFX8-NEXT: s_cbranch_execnz .LBB8_1
2584
+ ; GFX8-NEXT: ; %bb.2: ; %end
2585
+ ; GFX8-NEXT: s_endpgm
2586
+ ;
2587
+ ; GFX9-LABEL: negativeoffsetnullptr:
2588
+ ; GFX9: ; %bb.0: ; %entry
2589
+ ; GFX9-NEXT: s_mov_b64 s[0:1], src_private_base
2590
+ ; GFX9-NEXT: v_mov_b32_e32 v1, s1
2591
+ ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, -1, 0
2592
+ ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2593
+ ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
2594
+ ; GFX9-NEXT: s_mov_b64 s[0:1], 0
2595
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2596
+ ; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
2597
+ ; GFX9-NEXT: .LBB8_1: ; %branch
2598
+ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
2599
+ ; GFX9-NEXT: s_and_b64 s[2:3], exec, vcc
2600
+ ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
2601
+ ; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1]
2602
+ ; GFX9-NEXT: s_cbranch_execnz .LBB8_1
2603
+ ; GFX9-NEXT: ; %bb.2: ; %end
2604
+ ; GFX9-NEXT: s_endpgm
2605
+ ;
2606
+ ; GFX10-LABEL: negativeoffsetnullptr:
2607
+ ; GFX10: ; %bb.0: ; %entry
2608
+ ; GFX10-NEXT: s_mov_b64 s[0:1], src_private_base
2609
+ ; GFX10-NEXT: s_add_u32 s0, 0, -1
2610
+ ; GFX10-NEXT: s_addc_u32 s1, s1, -1
2611
+ ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2612
+ ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2613
+ ; GFX10-NEXT: s_mov_b32 s0, 0
2614
+ ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
2615
+ ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2616
+ ; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
2617
+ ; GFX10-NEXT: .LBB8_1: ; %branch
2618
+ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
2619
+ ; GFX10-NEXT: s_and_b32 s1, exec_lo, vcc_lo
2620
+ ; GFX10-NEXT: s_or_b32 s0, s1, s0
2621
+ ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
2622
+ ; GFX10-NEXT: s_cbranch_execnz .LBB8_1
2623
+ ; GFX10-NEXT: ; %bb.2: ; %end
2624
+ ; GFX10-NEXT: s_endpgm
2625
+ ;
2626
+ ; GFX11-LABEL: negativeoffsetnullptr:
2627
+ ; GFX11: ; %bb.0: ; %entry
2628
+ ; GFX11-NEXT: s_mov_b64 s[0:1], src_private_base
2629
+ ; GFX11-NEXT: v_add_co_u32 v0, s0, -1, 0
2630
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2631
+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
2632
+ ; GFX11-NEXT: s_mov_b32 s0, 0
2633
+ ; GFX11-NEXT: flat_load_u8 v0, v[0:1]
2634
+ ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2635
+ ; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
2636
+ ; GFX11-NEXT: .LBB8_1: ; %branch
2637
+ ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
2638
+ ; GFX11-NEXT: s_and_b32 s1, exec_lo, vcc_lo
2639
+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2640
+ ; GFX11-NEXT: s_or_b32 s0, s1, s0
2641
+ ; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
2642
+ ; GFX11-NEXT: s_cbranch_execnz .LBB8_1
2643
+ ; GFX11-NEXT: ; %bb.2: ; %end
2644
+ ; GFX11-NEXT: s_endpgm
2645
+ entry:
2646
+ %null = select i1 false , ptr %buffer , ptr addrspacecast (ptr addrspace (5 ) null to ptr )
2647
+ %gep = getelementptr i8 , ptr %null , i64 -1
2648
+ %ld = load i8 , ptr %gep
2649
+ %cmp = icmp eq i8 %ld , 0
2650
+ br label %branch
2651
+
2652
+ branch:
2653
+ br i1 %cmp , label %end , label %branch
2654
+
2655
+ end:
2656
+ ret void
2657
+ }
2658
+
2659
+
2565
2660
attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu" ="fiji" "uniform-work-group-size" ="false" }
0 commit comments