|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
1 | 2 | ; RUN: llc -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
|
2 | 3 |
|
3 | 4 | ;
|
|
9 | 10 |
|
10 | 11 |
|
11 | 12 | ; 128-bit loads instead of many 8-bit
|
12 |
| -; EG-LABEL: {{^}}combine_vloads: |
13 |
| -; EG: VTX_READ_128 |
14 |
| -; EG: VTX_READ_128 |
15 | 13 | define amdgpu_kernel void @combine_vloads(ptr addrspace(1) nocapture %src, ptr addrspace(1) nocapture %result) nounwind {
|
| 14 | +; EG-LABEL: combine_vloads: |
| 15 | +; EG: ; %bb.0: ; %entry |
| 16 | +; EG-NEXT: ALU 3, @16, KC0[CB0:0-32], KC1[] |
| 17 | +; EG-NEXT: LOOP_START_DX10 @10 |
| 18 | +; EG-NEXT: TEX 1 @12 |
| 19 | +; EG-NEXT: ALU 86, @20, KC0[], KC1[] |
| 20 | +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XY, T15.X, 0 |
| 21 | +; EG-NEXT: ALU_PUSH_BEFORE 4, @107, KC0[], KC1[] |
| 22 | +; EG-NEXT: JUMP @9 POP:1 |
| 23 | +; EG-NEXT: LOOP_BREAK @9 |
| 24 | +; EG-NEXT: POP @9 POP:1 |
| 25 | +; EG-NEXT: END_LOOP @2 |
| 26 | +; EG-NEXT: CF_END |
| 27 | +; EG-NEXT: PAD |
| 28 | +; EG-NEXT: Fetch clause starting at 12: |
| 29 | +; EG-NEXT: VTX_READ_128 T14.XYZW, T13.X, 0, #1 |
| 30 | +; EG-NEXT: VTX_READ_128 T15.XYZW, T13.X, 16, #1 |
| 31 | +; EG-NEXT: ALU clause starting at 16: |
| 32 | +; EG-NEXT: MOV T13.X, KC0[2].Y, |
| 33 | +; EG-NEXT: MOV T0.W, KC0[2].Z, |
| 34 | +; EG-NEXT: MOV * T1.W, literal.x, |
| 35 | +; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00) |
| 36 | +; EG-NEXT: ALU clause starting at 20: |
| 37 | +; EG-NEXT: LSHR T2.W, T14.Y, literal.x, |
| 38 | +; EG-NEXT: LSHR * T3.W, T14.W, literal.x, |
| 39 | +; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) |
| 40 | +; EG-NEXT: ADD_INT T2.W, PV.W, PS, |
| 41 | +; EG-NEXT: LSHR * T3.W, T15.Y, literal.x, |
| 42 | +; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) |
| 43 | +; EG-NEXT: LSHR T0.Y, T14.Y, literal.x, |
| 44 | +; EG-NEXT: LSHR T0.Z, T14.W, literal.x, |
| 45 | +; EG-NEXT: ADD_INT T2.W, PV.W, PS, |
| 46 | +; EG-NEXT: LSHR * T3.W, T15.W, literal.y, |
| 47 | +; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) |
| 48 | +; EG-NEXT: ADD_INT T16.X, PV.W, PS, |
| 49 | +; EG-NEXT: ADD_INT T0.Y, PV.Y, PV.Z, |
| 50 | +; EG-NEXT: LSHR T0.Z, T15.Y, literal.x, |
| 51 | +; EG-NEXT: LSHR T2.W, T14.X, literal.y, |
| 52 | +; EG-NEXT: LSHR * T3.W, T14.Z, literal.y, |
| 53 | +; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) |
| 54 | +; EG-NEXT: ADD_INT T17.X, PV.W, PS, |
| 55 | +; EG-NEXT: ADD_INT T0.Y, PV.Y, PV.Z, |
| 56 | +; EG-NEXT: LSHR T0.Z, T15.W, literal.x, |
| 57 | +; EG-NEXT: LSHR T2.W, T14.Y, literal.y, |
| 58 | +; EG-NEXT: LSHR * T3.W, T14.W, literal.y, |
| 59 | +; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) |
| 60 | +; EG-NEXT: LSHR T18.X, T15.X, literal.x, |
| 61 | +; EG-NEXT: LSHR T1.Y, T14.X, literal.y, BS:VEC_120/SCL_212 |
| 62 | +; EG-NEXT: ADD_INT T1.Z, PV.W, PS, |
| 63 | +; EG-NEXT: LSHR T2.W, T15.Y, literal.z, |
| 64 | +; EG-NEXT: ADD_INT * T3.W, PV.Y, PV.Z, |
| 65 | +; EG-NEXT: 24(3.363116e-44), 8(1.121039e-44) |
| 66 | +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) |
| 67 | +; EG-NEXT: LSHR T19.X, T14.Z, literal.x, |
| 68 | +; EG-NEXT: ADD_INT T0.Y, T14.Y, T14.W, |
| 69 | +; EG-NEXT: AND_INT T0.Z, PS, literal.y, |
| 70 | +; EG-NEXT: ADD_INT T2.W, PV.Z, PV.W, |
| 71 | +; EG-NEXT: LSHR * T3.W, T15.W, literal.z, |
| 72 | +; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43) |
| 73 | +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) |
| 74 | +; EG-NEXT: ADD_INT T20.X, PV.W, PS, |
| 75 | +; EG-NEXT: LSHL T2.Y, PV.Z, literal.x, |
| 76 | +; EG-NEXT: ADD_INT T0.Z, PV.Y, T15.Y, |
| 77 | +; EG-NEXT: ADD_INT T2.W, T1.Y, PV.X, |
| 78 | +; EG-NEXT: LSHR * T3.W, T15.X, literal.x, |
| 79 | +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) |
| 80 | +; EG-NEXT: ADD_INT T19.X, T14.X, T14.Z, |
| 81 | +; EG-NEXT: ADD_INT T0.Y, PV.W, PS, |
| 82 | +; EG-NEXT: LSHR T1.Z, T15.Z, literal.x, |
| 83 | +; EG-NEXT: LSHR T2.W, T14.X, literal.y, |
| 84 | +; EG-NEXT: LSHR * T3.W, T14.Z, literal.y, |
| 85 | +; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) |
| 86 | +; EG-NEXT: ADD_INT T14.X, PV.W, PS, |
| 87 | +; EG-NEXT: LSHR T1.Y, T15.X, literal.x, |
| 88 | +; EG-NEXT: ADD_INT T1.Z, PV.Y, PV.Z, |
| 89 | +; EG-NEXT: ADD_INT T2.W, PV.X, T15.X, |
| 90 | +; EG-NEXT: ADD_INT * T3.W, T0.Z, T15.W, |
| 91 | +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) |
| 92 | +; EG-NEXT: AND_INT T15.X, PS, literal.x, |
| 93 | +; EG-NEXT: ADD_INT T0.Y, PV.W, T15.Z, |
| 94 | +; EG-NEXT: AND_INT T0.Z, PV.Z, literal.x, |
| 95 | +; EG-NEXT: ADD_INT T2.W, PV.X, PV.Y, |
| 96 | +; EG-NEXT: LSHR * T3.W, T15.Z, literal.y, |
| 97 | +; EG-NEXT: 255(3.573311e-43), 16(2.242078e-44) |
| 98 | +; EG-NEXT: ADD_INT T14.X, PV.W, PS, |
| 99 | +; EG-NEXT: LSHL T1.Y, PV.Z, literal.x, |
| 100 | +; EG-NEXT: AND_INT T0.Z, PV.Y, literal.y, |
| 101 | +; EG-NEXT: OR_INT T2.W, PV.X, T2.Y, |
| 102 | +; EG-NEXT: LSHL * T3.W, T20.X, literal.z, |
| 103 | +; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43) |
| 104 | +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) |
| 105 | +; EG-NEXT: OR_INT T15.X, PV.W, PS, |
| 106 | +; EG-NEXT: OR_INT T0.Y, PV.Z, PV.Y, |
| 107 | +; EG-NEXT: LSHL T0.Z, PV.X, literal.x, |
| 108 | +; EG-NEXT: ADD_INT T2.W, T17.X, T18.X, |
| 109 | +; EG-NEXT: LSHR * T3.W, T15.Z, literal.y, |
| 110 | +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) |
| 111 | +; EG-NEXT: ADD_INT T1.Y, PV.W, PS, |
| 112 | +; EG-NEXT: OR_INT T0.Z, PV.Y, PV.Z, |
| 113 | +; EG-NEXT: AND_INT T2.W, PV.X, literal.x, |
| 114 | +; EG-NEXT: LSHL * T3.W, T16.X, literal.y, |
| 115 | +; EG-NEXT: 16777215(2.350989e-38), 24(3.363116e-44) |
| 116 | +; EG-NEXT: OR_INT T14.Y, PV.W, PS, |
| 117 | +; EG-NEXT: AND_INT T2.W, PV.Z, literal.x, |
| 118 | +; EG-NEXT: LSHL * T3.W, PV.Y, literal.y, |
| 119 | +; EG-NEXT: 16777215(2.350989e-38), 24(3.363116e-44) |
| 120 | +; EG-NEXT: OR_INT T14.X, PV.W, PS, |
| 121 | +; EG-NEXT: ADD_INT * T2.W, T0.W, T1.W, |
| 122 | +; EG-NEXT: LSHR * T15.X, PV.W, literal.x, |
| 123 | +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| 124 | +; EG-NEXT: ALU clause starting at 107: |
| 125 | +; EG-NEXT: ADD_INT * T1.W, T1.W, literal.x, |
| 126 | +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) |
| 127 | +; EG-NEXT: SETE_INT * T2.W, PV.W, literal.x, |
| 128 | +; EG-NEXT: 8192(1.147944e-41), 0(0.000000e+00) |
| 129 | +; EG-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0, |
16 | 130 | entry:
|
17 | 131 | br label %for.body
|
18 | 132 |
|
|
0 commit comments