|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=hawaii %s -o - | FileCheck %s |
| 3 | + |
| 4 | +; For all these tests we disable optimizations through function attributes |
| 5 | +; because the code we are exercising here needs phis and we want to keep the |
| 6 | +; IR small. |
| 7 | + |
| 8 | +; This code used to crash in SDISel because f16 was promoted to f32 through |
| 9 | +; a `f32 = vector_extract_elt <1 x f16>, i32 0`, which is illegal. |
| 10 | +; The invalid SDNode and thus, the crash was only exposed by the constant |
| 11 | +; folding. |
| 12 | +define void @phi_vec1half_to_f32_with_const_folding(ptr addrspace(1) %dst) #0 { |
| 13 | +; CHECK-LABEL: phi_vec1half_to_f32_with_const_folding: |
| 14 | +; CHECK: ; %bb.0: ; %entry |
| 15 | +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 16 | +; CHECK-NEXT: s_mov_b32 s4, 0 |
| 17 | +; CHECK-NEXT: v_cvt_f32_f16_e64 v2, s4 |
| 18 | +; CHECK-NEXT: ; %bb.1: ; %bb |
| 19 | +; CHECK-NEXT: v_cvt_f16_f32_e64 v2, v2 |
| 20 | +; CHECK-NEXT: s_mov_b32 s7, 0xf000 |
| 21 | +; CHECK-NEXT: s_mov_b32 s6, 0 |
| 22 | +; CHECK-NEXT: s_mov_b32 s4, s6 |
| 23 | +; CHECK-NEXT: s_mov_b32 s5, s6 |
| 24 | +; CHECK-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 offset:2 |
| 25 | +; CHECK-NEXT: v_cvt_f16_f32_e64 v2, s4 |
| 26 | +; CHECK-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 |
| 27 | +; CHECK-NEXT: s_waitcnt vmcnt(0) |
| 28 | +; CHECK-NEXT: s_setpc_b64 s[30:31] |
| 29 | +entry: |
| 30 | + br label %bb |
| 31 | + |
| 32 | +bb: |
| 33 | + %phi = phi <1 x half> [ zeroinitializer, %entry ] |
| 34 | + %res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1> |
| 35 | + store <2 x half> %res, ptr addrspace(1) %dst |
| 36 | + ret void |
| 37 | +} |
| 38 | + |
| 39 | +; Same as phi_vec1half_to_f32_with_const_folding but without the folding. |
| 40 | +; This test exercises the same invalid SDNode, but it happened to work by |
| 41 | +; accident before. Here we make sure the fix also work as expected in the |
| 42 | +; non-constant folding case. |
| 43 | +define void @phi_vec1half_to_f32(ptr addrspace(1) %src, ptr addrspace(1) %dst) #0 { |
| 44 | +; CHECK-LABEL: phi_vec1half_to_f32: |
| 45 | +; CHECK: ; %bb.0: ; %entry |
| 46 | +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 47 | +; CHECK-NEXT: s_mov_b32 s7, 0xf000 |
| 48 | +; CHECK-NEXT: s_mov_b32 s6, 0 |
| 49 | +; CHECK-NEXT: s_mov_b32 s4, s6 |
| 50 | +; CHECK-NEXT: s_mov_b32 s5, s6 |
| 51 | +; CHECK-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| 52 | +; CHECK-NEXT: s_waitcnt vmcnt(0) |
| 53 | +; CHECK-NEXT: v_cvt_f32_f16_e64 v0, v0 |
| 54 | +; CHECK-NEXT: ; %bb.1: ; %bb |
| 55 | +; CHECK-NEXT: v_cvt_f16_f32_e64 v0, v0 |
| 56 | +; CHECK-NEXT: s_mov_b32 s7, 0xf000 |
| 57 | +; CHECK-NEXT: s_mov_b32 s6, 0 |
| 58 | +; CHECK-NEXT: s_mov_b32 s4, s6 |
| 59 | +; CHECK-NEXT: s_mov_b32 s5, s6 |
| 60 | +; CHECK-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 offset:2 |
| 61 | +; CHECK-NEXT: v_cvt_f16_f32_e64 v0, s4 |
| 62 | +; CHECK-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| 63 | +; CHECK-NEXT: s_waitcnt vmcnt(0) |
| 64 | +; CHECK-NEXT: s_setpc_b64 s[30:31] |
| 65 | +entry: |
| 66 | + %input = load <1 x half>, ptr addrspace(1) %src |
| 67 | + br label %bb |
| 68 | + |
| 69 | +bb: |
| 70 | + %phi = phi <1 x half> [ %input, %entry ] |
| 71 | + %res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1> |
| 72 | + store <2 x half> %res, ptr addrspace(1) %dst |
| 73 | + ret void |
| 74 | +} |
| 75 | + |
| 76 | +; Same as phi_vec1bf16_to_f32 but with bfloat instead of half. |
| 77 | +define void @phi_vec1bf16_to_f32(ptr addrspace(1) %src, ptr addrspace(1) %dst) #0 { |
| 78 | +; CHECK-LABEL: phi_vec1bf16_to_f32: |
| 79 | +; CHECK: ; %bb.0: ; %entry |
| 80 | +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 81 | +; CHECK-NEXT: s_mov_b32 s7, 0xf000 |
| 82 | +; CHECK-NEXT: s_mov_b32 s6, 0 |
| 83 | +; CHECK-NEXT: s_mov_b32 s4, s6 |
| 84 | +; CHECK-NEXT: s_mov_b32 s5, s6 |
| 85 | +; CHECK-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| 86 | +; CHECK-NEXT: s_mov_b32 s4, 16 |
| 87 | +; CHECK-NEXT: s_waitcnt vmcnt(0) |
| 88 | +; CHECK-NEXT: v_lshlrev_b32_e64 v0, s4, v0 |
| 89 | +; CHECK-NEXT: ; %bb.1: ; %bb |
| 90 | +; CHECK-NEXT: v_mul_f32_e64 v0, 1.0, v0 |
| 91 | +; CHECK-NEXT: s_mov_b32 s4, 16 |
| 92 | +; CHECK-NEXT: v_lshrrev_b32_e64 v0, s4, v0 |
| 93 | +; CHECK-NEXT: s_mov_b32 s7, 0xf000 |
| 94 | +; CHECK-NEXT: s_mov_b32 s6, 0 |
| 95 | +; CHECK-NEXT: s_mov_b32 s4, s6 |
| 96 | +; CHECK-NEXT: s_mov_b32 s5, s6 |
| 97 | +; CHECK-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 offset:2 |
| 98 | +; CHECK-NEXT: s_waitcnt vmcnt(0) |
| 99 | +; CHECK-NEXT: s_setpc_b64 s[30:31] |
| 100 | +entry: |
| 101 | + %input = load <1 x bfloat>, ptr addrspace(1) %src |
| 102 | + br label %bb |
| 103 | + |
| 104 | +bb: |
| 105 | + %phi = phi <1 x bfloat> [ %input, %entry ] |
| 106 | + %res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1> |
| 107 | + store <2 x bfloat> %res, ptr addrspace(1) %dst |
| 108 | + ret void |
| 109 | +} |
| 110 | + |
| 111 | +; Same as phi_vec1half_to_f32_with_const_folding but with bfloat instead of half. |
| 112 | +define void @phi_vec1bf16_to_f32_with_const_folding(ptr addrspace(1) %dst) #0 { |
| 113 | +; CHECK-LABEL: phi_vec1bf16_to_f32_with_const_folding: |
| 114 | +; CHECK: ; %bb.0: ; %entry |
| 115 | +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 116 | +; CHECK-NEXT: s_mov_b32 s4, 0 |
| 117 | +; CHECK-NEXT: ; %bb.1: ; %bb |
| 118 | +; CHECK-NEXT: v_mul_f32_e64 v2, 1.0, s4 |
| 119 | +; CHECK-NEXT: s_mov_b32 s4, 16 |
| 120 | +; CHECK-NEXT: v_lshrrev_b32_e32 v2, s4, v2 |
| 121 | +; CHECK-NEXT: s_mov_b32 s7, 0xf000 |
| 122 | +; CHECK-NEXT: s_mov_b32 s6, 0 |
| 123 | +; CHECK-NEXT: s_mov_b32 s4, s6 |
| 124 | +; CHECK-NEXT: s_mov_b32 s5, s6 |
| 125 | +; CHECK-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 offset:2 |
| 126 | +; CHECK-NEXT: s_waitcnt vmcnt(0) |
| 127 | +; CHECK-NEXT: s_setpc_b64 s[30:31] |
| 128 | +entry: |
| 129 | + br label %bb |
| 130 | + |
| 131 | +bb: |
| 132 | + %phi = phi <1 x bfloat> [ zeroinitializer, %entry ] |
| 133 | + %res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1> |
| 134 | + store <2 x bfloat> %res, ptr addrspace(1) %dst |
| 135 | + ret void |
| 136 | +} |
| 137 | + |
| 138 | +attributes #0 = { noinline optnone } |
0 commit comments