|
| 1 | +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s |
| 2 | + |
| 3 | +// The aim of this test is to verfiy that information of |
| 4 | +// alignment of loaded objects is passed to outlined |
| 5 | +// functions. |
| 6 | + |
| 7 | +module attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { |
| 8 | + omp.private {type = private} @_QFEk_private_i32 : i32 |
| 9 | + llvm.func @_QQmain() { |
| 10 | + %0 = llvm.mlir.constant(1 : i32) : i32 |
| 11 | + %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5> |
| 12 | + %8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr |
| 13 | + %12 = llvm.mlir.constant(1 : i64) : i64 |
| 14 | + %13 = llvm.alloca %12 x i32 {bindc_name = "k"} : (i64) -> !llvm.ptr<5> |
| 15 | + %14 = llvm.addrspacecast %13 : !llvm.ptr<5> to !llvm.ptr |
| 16 | + %15 = llvm.mlir.constant(1 : i64) : i64 |
| 17 | + %16 = llvm.alloca %15 x i32 {bindc_name = "b"} : (i64) -> !llvm.ptr<5> |
| 18 | + %17 = llvm.addrspacecast %16 : !llvm.ptr<5> to !llvm.ptr |
| 19 | + %19 = llvm.mlir.constant(1 : index) : i64 |
| 20 | + %20 = llvm.mlir.constant(0 : index) : i64 |
| 21 | + %22 = llvm.mlir.addressof @_QFEa : !llvm.ptr |
| 22 | + %25 = llvm.mlir.addressof @_QFECnz : !llvm.ptr |
| 23 | + %60 = llvm.getelementptr %8[0, 7, %20, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 24 | + %61 = llvm.load %60 : !llvm.ptr -> i64 |
| 25 | + %62 = llvm.getelementptr %8[0, 7, %20, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 26 | + %63 = llvm.load %62 : !llvm.ptr -> i64 |
| 27 | + %64 = llvm.getelementptr %8[0, 7, %20, 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 28 | + %65 = llvm.load %64 : !llvm.ptr -> i64 |
| 29 | + %66 = llvm.sub %63, %19 : i64 |
| 30 | + %67 = omp.map.bounds lower_bound(%20 : i64) upper_bound(%66 : i64) extent(%63 : i64) stride(%65 : i64) start_idx(%61 : i64) {stride_in_bytes = true} |
| 31 | + %68 = llvm.getelementptr %22[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 32 | + %69 = omp.map.info var_ptr(%22 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%68 : !llvm.ptr) bounds(%67) -> !llvm.ptr {name = ""} |
| 33 | + %70 = omp.map.info var_ptr(%22 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%69 : [0] : !llvm.ptr) -> !llvm.ptr {name = "a"} |
| 34 | + %71 = omp.map.info var_ptr(%17 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "b"} |
| 35 | + %72 = omp.map.info var_ptr(%14 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "k"} |
| 36 | + %73 = omp.map.info var_ptr(%25 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "nz"} |
| 37 | + omp.target map_entries(%70 -> %arg0, %71 -> %arg1, %72 -> %arg2, %73 -> %arg3, %69 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { |
| 38 | + %106 = llvm.mlir.constant(0 : index) : i64 |
| 39 | + %107 = llvm.mlir.constant(13 : i32) : i32 |
| 40 | + %108 = llvm.mlir.constant(1000 : i32) : i32 |
| 41 | + %109 = llvm.mlir.constant(1 : i32) : i32 |
| 42 | + omp.teams { |
| 43 | + omp.parallel private(@_QFEk_private_i32 %arg2 -> %arg5 : !llvm.ptr) { |
| 44 | + %110 = llvm.mlir.constant(1 : i32) : i32 |
| 45 | + %111 = llvm.alloca %110 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5> |
| 46 | + %112 = llvm.addrspacecast %111 : !llvm.ptr<5> to !llvm.ptr |
| 47 | + omp.distribute { |
| 48 | + omp.wsloop { |
| 49 | + omp.loop_nest (%arg6) : i32 = (%109) to (%108) inclusive step (%109) { |
| 50 | + llvm.store %arg6, %arg5 : i32, !llvm.ptr |
| 51 | + %115 = llvm.mlir.constant(48 : i32) : i32 |
| 52 | + "llvm.intr.memcpy"(%112, %arg0, %115) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () |
| 53 | + omp.yield |
| 54 | + } |
| 55 | + } {omp.composite} |
| 56 | + } {omp.composite} |
| 57 | + omp.terminator |
| 58 | + } {omp.composite} |
| 59 | + omp.terminator |
| 60 | + } |
| 61 | + omp.terminator |
| 62 | + } |
| 63 | + llvm.return |
| 64 | + } |
| 65 | + llvm.mlir.global internal @_QFEa() {addr_space = 0 : i32} : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> { |
| 66 | + %6 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 67 | + llvm.return %6 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 68 | + } |
| 69 | + llvm.mlir.global internal constant @_QFECnz() {addr_space = 0 : i32} : i32 { |
| 70 | + %0 = llvm.mlir.constant(1000 : i32) : i32 |
| 71 | + llvm.return %0 : i32 |
| 72 | + } |
| 73 | +} |
| 74 | + |
| 75 | +// CHECK: call void @__kmpc_distribute_for_static_loop_4u( |
| 76 | +// CHECK-SAME: ptr addrspacecast (ptr addrspace(1) @[[GLOB:[0-9]+]] to ptr), |
| 77 | +// CHECK-SAME: ptr @[[LOOP_BODY_FUNC:.*]], ptr %[[LOOP_BODY_FUNC_ARG:.*]], |
| 78 | +// CHEKC-SAME i32 1000, i32 %1, i32 0, i32 0) |
| 79 | + |
| 80 | + |
| 81 | +// CHECK: define internal void @[[LOOP_BODY_FUNC]](i32 %[[CNT:.*]], ptr %[[LOOP_BODY_ARG_PTR:.*]]) #[[ATTRS:[0-9]+]] { |
| 82 | +// CHECK: %[[GEP_PTR_0:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[LOOP_BODY_ARG_PTR]], i32 0, i32 0 |
| 83 | +// CHECK: %[[INT_PTR:.*]] = load ptr, ptr %[[GEP_PTR_0]], align 8, !align ![[ALIGN_INT:[0-9]+]] |
| 84 | +// CHECK: %[[GEP_PTR_1:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[LOOP_BODY_ARG_PTR]], i32 0, i32 1 |
| 85 | +// CHECK: %[[STRUCT_PTR_0:.*]] = load ptr, ptr %[[GEP_PTR_1]], align 8, !align ![[ALIGN_STRUCT:[0-9]+]] |
| 86 | +// CHECK: %[[GEP_PTR_2:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[LOOP_BODY_ARG_PTR]], i32 0, i32 2 |
| 87 | +// CHECK: %[[STRUCT_PTR_1:.*]] = load ptr, ptr %[[GEP_PTR_2]], align 8, !align ![[ALIGN_STRUCT:[0-9]+]] |
| 88 | +// CHECK: store i32 %[[DATA_INT:.*]], ptr %[[INT_PTR]], align 4 |
| 89 | +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[STRUCT_PTR_0]], ptr %[[STRUCT_PTR_1]], i32 48, i1 false) |
| 90 | + |
| 91 | +// CHECK: ![[ALIGN_STRUCT]] = !{i64 8} |
| 92 | +// CHECK: ![[ALIGN_INT]] = !{i64 4} |
0 commit comments