|
| 1 | +// DEFINE: %{entry_point} = entry |
| 2 | +// DEFINE: %{compile} = mlir-opt %s \ |
| 3 | +// DEFINE: -enable-arm-streaming="mode=locally enable-za" \ |
| 4 | +// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \ |
| 5 | +// DEFINE: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \ |
| 6 | +// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm |
| 7 | +// DEFINE: %{run} = %mcr_aarch64_cmd \ |
| 8 | +// DEFINE: -march=aarch64 -mattr=+sve,+sme \ |
| 9 | +// DEFINE: -e %{entry_point} -entry-point-result=void \ |
| 10 | +// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils |
| 11 | + |
| 12 | +// RUN: %{compile} | %{run} | FileCheck %s |
| 13 | + |
| 14 | +llvm.func @printCString(!llvm.ptr<i8>) |
| 15 | + |
| 16 | +// TODO: replace with vector.print <str> once #68695 lands. |
| 17 | +func.func @print_str(%str: !llvm.ptr<array<17 x i8>>) attributes { enable_arm_streaming_ignore } { |
| 18 | + %c0 = llvm.mlir.constant(0 : index) : i64 |
| 19 | + %str_bytes = llvm.getelementptr %str[%c0, %c0] |
| 20 | + : (!llvm.ptr<array<17 x i8>>, i64, i64) -> !llvm.ptr<i8> |
| 21 | + llvm.call @printCString(%str_bytes) : (!llvm.ptr<i8>) -> () |
| 22 | + return |
| 23 | +} |
| 24 | + |
| 25 | +// Vector store. |
| 26 | +func.func @transfer_write_2d(%A : memref<?x?xf32>, %base1: index, %base2: index) { |
| 27 | + %c0 = arith.constant 0.0 : f32 |
| 28 | + %zero = vector.splat %c0 : vector<[4]x[4]xf32> |
| 29 | + vector.transfer_write %zero, %A[%base1, %base2] {in_bounds=[true, true]} : |
| 30 | + vector<[4]x[4]xf32>, memref<?x?xf32> |
| 31 | + return |
| 32 | +} |
| 33 | + |
| 34 | +// Masked vector store. |
| 35 | +func.func @transfer_write_2d_mask(%A : memref<?x?xf32>, %base1: index, %base2: index) { |
| 36 | + %c0 = arith.constant 0.0 : f32 |
| 37 | + %c2 = arith.constant 2 : index |
| 38 | + %c3 = arith.constant 3 : index |
| 39 | + %mask = vector.create_mask %c2, %c3 : vector<[4]x[4]xi1> |
| 40 | + %zero = vector.splat %c0 : vector<[4]x[4]xf32> |
| 41 | + vector.transfer_write %zero, %A[%base1, %base2], %mask {in_bounds=[true, true]} : |
| 42 | + vector<[4]x[4]xf32>, memref<?x?xf32> |
| 43 | + return |
| 44 | +} |
| 45 | + |
| 46 | +// Vector store + transpose. |
| 47 | +func.func @transfer_write_2d_transposed(%A : memref<?x?xf32>, %base1: index, %base2: index) { |
| 48 | + %0 = vector.load %A[%base1, %base2] : memref<?x?xf32>, vector<[4]x[4]xf32> |
| 49 | + vector.transfer_write %0, %A[%base1, %base2] {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds=[true, true]} : |
| 50 | + vector<[4]x[4]xf32>, memref<?x?xf32> |
| 51 | + return |
| 52 | +} |
| 53 | + |
| 54 | +// Masked vector store + transpose. |
| 55 | +func.func @transfer_write_2d_mask_transposed(%A : memref<?x?xf32>, %base1: index, %base2: index) { |
| 56 | + %c2 = arith.constant 2 : index |
| 57 | + %c4 = arith.constant 4 : index |
| 58 | + %mask = vector.create_mask %c4, %c2 : vector<[4]x[4]xi1> |
| 59 | + %0 = vector.load %A[%base1, %base2] : memref<?x?xf32>, vector<[4]x[4]xf32> |
| 60 | + vector.transfer_write %0, %A[%base1, %base2], %mask {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds=[true, true]} : |
| 61 | + vector<[4]x[4]xf32>, memref<?x?xf32> |
| 62 | + return |
| 63 | +} |
| 64 | + |
| 65 | +// Vector load + print. |
| 66 | +func.func @load_and_print(%A : memref<?x?xf32>, %base1: index, %base2: index) { |
| 67 | + %tile_begin_str = llvm.mlir.addressof @tile_begin : !llvm.ptr<array<17 x i8>> |
| 68 | + |
| 69 | + %0 = vector.load %A[%base1, %base2] : memref<?x?xf32>, vector<[4]x[4]xf32> |
| 70 | + |
| 71 | + func.call @print_str(%tile_begin_str) : (!llvm.ptr<array<17 x i8>>) -> () |
| 72 | + vector.print %0: vector<[4]x[4]xf32> |
| 73 | + |
| 74 | + return |
| 75 | +} |
| 76 | + |
| 77 | +// Allocate heap memory of size 'd0' x 'd1' and initialize. |
| 78 | +// |
| 79 | +// Example: |
| 80 | +// |
| 81 | +// initialize_memory(%c4, %c5) |
| 82 | +// |
| 83 | +// 0, 1, 2, 3, 4 |
| 84 | +// 10, 11, 12, 13, 14 |
| 85 | +// 20, 21, 22, 23, 24 |
| 86 | +// 30, 31, 32, 33, 34 |
| 87 | +// |
| 88 | +// Returns dynamic memref. It's the callers responsiblity to free the returned |
| 89 | +// memref. |
| 90 | +func.func @initialize_memory(%d0 : index, %d1 : index) -> memref<?x?xf32> { |
| 91 | + %c0 = arith.constant 0 : index |
| 92 | + %c1 = arith.constant 1 : index |
| 93 | + %c1_f32 = arith.constant 1.0 : f32 |
| 94 | + %c10_f32 = arith.constant 10.0 : f32 |
| 95 | + |
| 96 | + %A = memref.alloc(%d0, %d1) : memref<?x?xf32> |
| 97 | + |
| 98 | + %init = arith.constant 0.0 : f32 |
| 99 | + scf.for %i = %c0 to %d0 step %c1 iter_args(%val = %init) -> f32 { |
| 100 | + scf.for %j = %c0 to %d1 step %c1 iter_args(%inner_val = %val) -> f32 { |
| 101 | + memref.store %inner_val, %A[%i, %j] : memref<?x?xf32> |
| 102 | + %inner_val_next = arith.addf %inner_val, %c1_f32 : f32 |
| 103 | + scf.yield %inner_val_next : f32 |
| 104 | + } |
| 105 | + %val_next = arith.addf %val, %c10_f32 : f32 |
| 106 | + scf.yield %val_next : f32 |
| 107 | + } |
| 108 | + |
| 109 | + return %A : memref<?x?xf32> |
| 110 | +} |
| 111 | + |
| 112 | +func.func @entry() { |
| 113 | + %c0 = arith.constant 0 : index |
| 114 | + %c2 = arith.constant 2 : index |
| 115 | + %c4 = arith.constant 4 : index |
| 116 | + |
| 117 | + // Allocate enough memory to load a 32-bit tile plus a tiny bit more to test |
| 118 | + // non-zero offsets while remaining inbounds. |
| 119 | + %vscale = vector.vscale |
| 120 | + %svl_s = arith.muli %c4, %vscale : index |
| 121 | + %svl_s_plus_two = arith.addi %svl_s, %c2 : index |
| 122 | + |
| 123 | + // 1. Initialize memory |
| 124 | + // CHECK-LABEL: TILE BEGIN: |
| 125 | + // CHECK-NEXT: ( 0, 1, 2, 3 |
| 126 | + // CHECK-NEXT: ( 10, 11, 12, 13 |
| 127 | + // CHECK-NEXT: ( 20, 21, 22, 23 |
| 128 | + // CHECK-NEXT: ( 30, 31, 32, 33 |
| 129 | + %A = call @initialize_memory(%svl_s_plus_two, %svl_s_plus_two) : (index, index) -> memref<?x?xf32> |
| 130 | + call @load_and_print(%A, %c0, %c0) : (memref<?x?xf32>, index, index) -> () |
| 131 | + |
| 132 | + // 2. Write 2-D vector of zeroes to 1. at offset [2, 2]. |
| 133 | + // CHECK-LABEL: TILE BEGIN: |
| 134 | + // CHECK-NEXT: ( 0, 1, 2, 3 |
| 135 | + // CHECK-NEXT: ( 10, 11, 12, 13 |
| 136 | + // CHECK-NEXT: ( 20, 21, 0, 0 |
| 137 | + // CHECK-NEXT: ( 30, 31, 0, 0 |
| 138 | + call @transfer_write_2d(%A, %c2, %c2) : (memref<?x?xf32>, index, index) -> () |
| 139 | + call @load_and_print(%A, %c0, %c0) : (memref<?x?xf32>, index, index) -> () |
| 140 | + |
| 141 | + // 3. Write 2-D vector of zeroes to 2. but with mask (nrows=2, ncols=3). |
| 142 | + // CHECK-LABEL: TILE BEGIN: |
| 143 | + // CHECK-NEXT: ( 0, 0, 0, 3 |
| 144 | + // CHECK-NEXT: ( 0, 0, 0, 13 |
| 145 | + // CHECK-NEXT: ( 20, 21, 0, 0 |
| 146 | + // CHECK-NEXT: ( 30, 31, 0, 0 |
| 147 | + call @transfer_write_2d_mask(%A, %c0, %c0) : (memref<?x?xf32>, index, index) -> () |
| 148 | + call @load_and_print(%A, %c0, %c0) : (memref<?x?xf32>, index, index) -> () |
| 149 | + |
| 150 | + // 4. Reload 3. + store + transpose. |
| 151 | + // CHECK-LABEL: TILE BEGIN: |
| 152 | + // CHECK-NEXT: ( 0, 0, 20, 30 |
| 153 | + // CHECK-NEXT: ( 0, 0, 21, 31 |
| 154 | + // CHECK-NEXT: ( 0, 0, 0, 0 |
| 155 | + // CHECK-NEXT: ( 3, 13, 0, 0 |
| 156 | + call @transfer_write_2d_transposed(%A, %c0, %c0) : (memref<?x?xf32>, index, index) -> () |
| 157 | + call @load_and_print(%A, %c0, %c0) : (memref<?x?xf32>, index, index) -> () |
| 158 | + |
| 159 | + // 5. Reload 4. + store + transpose but with mask (nrows=4, ncols=2). |
| 160 | + // The mask applies after permutation |
| 161 | + // CHECK-LABEL: TILE BEGIN: |
| 162 | + // CHECK-NEXT: ( 0, 0, 20, 30 |
| 163 | + // CHECK-NEXT: ( 0, 0, 21, 31 |
| 164 | + // CHECK-NEXT: ( 20, 21, 0, 0 |
| 165 | + // CHECK-NEXT: ( 30, 31, 0, 0 |
| 166 | + call @transfer_write_2d_mask_transposed(%A, %c0, %c0) : (memref<?x?xf32>, index, index) -> () |
| 167 | + call @load_and_print(%A, %c0, %c0) : (memref<?x?xf32>, index, index) -> () |
| 168 | + |
| 169 | + memref.dealloc %A : memref<?x?xf32> |
| 170 | + |
| 171 | + return |
| 172 | +} |
| 173 | + |
| 174 | +llvm.mlir.global internal constant @tile_begin("TILE BEGIN: \0A\00") |
0 commit comments