1
- // RUN: split-file %s %t
2
- // RUN: mlir-translate -mlir-to-llvmir %t/host.mlir | FileCheck %s --check-prefix=HOST
3
- // RUN: mlir-translate -mlir-to-llvmir %t/device.mlir | FileCheck %s --check-prefix=DEVICE
4
-
5
- //--- host.mlir
1
+ // RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
6
2
7
3
module attributes {omp.is_target_device = false , omp.target_triples = [" amdgcn-amd-amdhsa" ]} {
8
- llvm.func @main (%arg0 : !llvm.ptr ) {
4
+ llvm.func @host (%arg0 : !llvm.ptr ) {
9
5
%x = llvm.load %arg0 : !llvm.ptr -> i32
10
6
%0 = omp.map.info var_ptr (%arg0 : !llvm.ptr , i32 ) map_clauses (to ) capture (ByCopy ) -> !llvm.ptr
11
7
omp.target host_eval (%x -> %lb , %x -> %ub , %x -> %step : i32 , i32 , i32 ) map_entries (%0 -> %ptr : !llvm.ptr ) {
@@ -32,36 +28,36 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
32
28
}
33
29
}
34
30
35
- // HOST -LABEL: define void @main
36
- // HOST : %omp_loop.tripcount = {{.*}}
37
- // HOST -NEXT: br label %[[ENTRY:.*]]
38
- // HOST : [[ENTRY]]:
39
- // HOST : %[[TRIPCOUNT:.*]] = zext i32 %omp_loop.tripcount to i64
40
- // HOST : %[[TRIPCOUNT_KARG:.*]] = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %[[KARGS:.*]], i32 0, i32 8
41
- // HOST -NEXT: store i64 %[[TRIPCOUNT]], ptr %[[TRIPCOUNT_KARG]]
42
- // HOST : %[[RESULT:.*]] = call i32 @__tgt_target_kernel({{.*}}, ptr %[[KARGS]])
43
- // HOST -NEXT: %[[CMP:.*]] = icmp ne i32 %[[RESULT]], 0
44
- // HOST -NEXT: br i1 %[[CMP]], label %[[OFFLOAD_FAILED:.*]], label %{{.*}}
45
- // HOST : [[OFFLOAD_FAILED]]:
46
- // HOST : call void @[[TARGET_OUTLINE:.*]]({{.*}})
31
+ // CHECK -LABEL: define void @host
32
+ // CHECK : %omp_loop.tripcount = {{.*}}
33
+ // CHECK -NEXT: br label %[[ENTRY:.*]]
34
+ // CHECK : [[ENTRY]]:
35
+ // CHECK : %[[TRIPCOUNT:.*]] = zext i32 %omp_loop.tripcount to i64
36
+ // CHECK : %[[TRIPCOUNT_KARG:.*]] = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %[[KARGS:.*]], i32 0, i32 8
37
+ // CHECK -NEXT: store i64 %[[TRIPCOUNT]], ptr %[[TRIPCOUNT_KARG]]
38
+ // CHECK : %[[RESULT:.*]] = call i32 @__tgt_target_kernel({{.*}}, ptr %[[KARGS]])
39
+ // CHECK -NEXT: %[[CMP:.*]] = icmp ne i32 %[[RESULT]], 0
40
+ // CHECK -NEXT: br i1 %[[CMP]], label %[[OFFLOAD_FAILED:.*]], label %{{.*}}
41
+ // CHECK : [[OFFLOAD_FAILED]]:
42
+ // CHECK : call void @[[TARGET_OUTLINE:.*]]({{.*}})
47
43
48
- // HOST : define internal void @[[TARGET_OUTLINE]]
49
- // HOST : call void{{.*}}@__kmpc_fork_teams({{.*}}, ptr @[[TEAMS_OUTLINE:.*]], {{.*}})
44
+ // CHECK : define internal void @[[TARGET_OUTLINE]]
45
+ // CHECK : call void{{.*}}@__kmpc_fork_teams({{.*}}, ptr @[[TEAMS_OUTLINE:.*]], {{.*}})
50
46
51
- // HOST : define internal void @[[TEAMS_OUTLINE]]
52
- // HOST : call void @[[DISTRIBUTE_OUTLINE:.*]]({{.*}})
47
+ // CHECK : define internal void @[[TEAMS_OUTLINE]]
48
+ // CHECK : call void @[[DISTRIBUTE_OUTLINE:.*]]({{.*}})
53
49
54
- // HOST : define internal void @[[DISTRIBUTE_OUTLINE]]
55
- // HOST : call void @__kmpc_for_static_init{{.*}}(ptr {{.*}}, i32 {{.*}}, i32 92, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, i32 {{.*}}, i32 {{.*}})
56
- // HOST : call void (ptr, i32, ptr, ...) @__kmpc_fork_call({{.*}}, ptr @[[PARALLEL_OUTLINE:.*]], {{.*}})
50
+ // CHECK : define internal void @[[DISTRIBUTE_OUTLINE]]
51
+ // CHECK : call void @__kmpc_for_static_init{{.*}}(ptr {{.*}}, i32 {{.*}}, i32 92, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, i32 {{.*}}, i32 {{.*}})
52
+ // CHECK : call void (ptr, i32, ptr, ...) @__kmpc_fork_call({{.*}}, ptr @[[PARALLEL_OUTLINE:.*]], {{.*}})
57
53
58
- // HOST : define internal void @[[PARALLEL_OUTLINE]]
59
- // HOST : call void @__kmpc_for_static_init{{.*}}(ptr {{.*}}, i32 {{.*}}, i32 34, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, i32 {{.*}}, i32 {{.*}})
54
+ // CHECK : define internal void @[[PARALLEL_OUTLINE]]
55
+ // CHECK : call void @__kmpc_for_static_init{{.*}}(ptr {{.*}}, i32 {{.*}}, i32 34, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, i32 {{.*}}, i32 {{.*}})
60
56
61
- //--- device.mlir
57
+ // -----
62
58
63
59
module attributes {dlti.dl_spec = #dlti.dl_spec <#dlti.dl_entry <" dlti.alloca_memory_space" , 5 : ui32 >>, llvm.target_triple = " amdgcn-amd-amdhsa" , omp.is_target_device = true , omp.is_gpu = true } {
64
- llvm.func @main (%arg0 : !llvm.ptr ) {
60
+ llvm.func @device (%arg0 : !llvm.ptr ) {
65
61
%0 = omp.map.info var_ptr (%arg0 : !llvm.ptr , i32 ) map_clauses (to ) capture (ByCopy ) -> !llvm.ptr
66
62
omp.target map_entries (%0 -> %ptr : !llvm.ptr ) {
67
63
%x = llvm.load %ptr : !llvm.ptr -> i32
@@ -87,25 +83,80 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
87
83
}
88
84
}
89
85
90
- // DEVICE: @[[KERNEL_NAME:.*]]_exec_mode = weak protected constant i8 [[EXEC_MODE:3]]
91
- // DEVICE: @llvm.compiler.used = appending global [1 x ptr] [ptr @[[KERNEL_NAME]]_exec_mode], section "llvm.metadata"
92
- // DEVICE: @[[KERNEL_NAME]]_kernel_environment = weak_odr protected constant %struct.KernelEnvironmentTy {
93
- // DEVICE-SAME: %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 [[EXEC_MODE]], {{.*}}},
94
- // DEVICE-SAME: ptr @{{.*}}, ptr @{{.*}} }
86
+ // CHECK: @[[KERNEL_NAME:.*]]_exec_mode = weak protected constant i8 [[EXEC_MODE:3]]
87
+ // CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr @[[KERNEL_NAME]]_exec_mode], section "llvm.metadata"
88
+ // CHECK: @[[KERNEL_NAME]]_kernel_environment = weak_odr protected constant %struct.KernelEnvironmentTy {
89
+ // CHECK-SAME: %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 [[EXEC_MODE]], {{.*}}},
90
+ // CHECK-SAME: ptr @{{.*}}, ptr @{{.*}} }
91
+
92
+ // CHECK: define weak_odr protected amdgpu_kernel void @[[KERNEL_NAME]]({{.*}})
93
+ // CHECK: %{{.*}} = call i32 @__kmpc_target_init(ptr @[[KERNEL_NAME]]_kernel_environment, {{.*}})
94
+ // CHECK: call void @[[TARGET_OUTLINE:.*]]({{.*}})
95
+ // CHECK: call void @__kmpc_target_deinit()
96
+
97
+ // CHECK: define internal void @[[TARGET_OUTLINE]]({{.*}})
98
+ // CHECK: call void @[[TEAMS_OUTLINE:.*]]({{.*}})
99
+
100
+ // CHECK: define internal void @[[TEAMS_OUTLINE]]({{.*}})
101
+ // CHECK: call void @__kmpc_distribute_static_loop{{.*}}({{.*}}, ptr @[[DISTRIBUTE_OUTLINE:[^,]*]], {{.*}})
102
+
103
+ // CHECK: define internal void @[[DISTRIBUTE_OUTLINE]]({{.*}})
104
+ // CHECK: call void @__kmpc_parallel_51(ptr {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, ptr @[[PARALLEL_OUTLINE:.*]], ptr {{.*}}, ptr {{.*}}, i64 {{.*}})
105
+
106
+ // CHECK: define internal void @[[PARALLEL_OUTLINE]]({{.*}})
107
+ // CHECK: call void @__kmpc_for_static_loop{{.*}}({{.*}})
108
+
109
+ // -----
110
+
111
+ module attributes {llvm.target_triple = " amdgcn-amd-amdhsa" , omp.is_target_device = true , omp.is_gpu = true } {
112
+ llvm.func @device2 (%arg0 : !llvm.ptr ) {
113
+ %0 = omp.map.info var_ptr (%arg0 : !llvm.ptr , i32 ) map_clauses (to ) capture (ByCopy ) -> !llvm.ptr
114
+ omp.target map_entries (%0 -> %ptr : !llvm.ptr ) {
115
+ %x = llvm.load %ptr : !llvm.ptr -> i32
116
+ omp.teams {
117
+ omp.distribute {
118
+ omp.loop_nest (%iv1 ) : i32 = (%x ) to (%x ) step (%x ) {
119
+ omp.parallel {
120
+ omp.terminator
121
+ }
122
+ llvm.br ^bb2
123
+ ^bb1 :
124
+ omp.parallel {
125
+ omp.terminator
126
+ }
127
+ omp.yield
128
+ ^bb2 :
129
+ llvm.br ^bb1
130
+ }
131
+ }
132
+ omp.terminator
133
+ }
134
+ omp.terminator
135
+ }
136
+ llvm.return
137
+ }
138
+ }
139
+
140
+ // CHECK: @[[KERNEL_NAME:.*]]_exec_mode = weak protected constant i8 [[EXEC_MODE:3]]
141
+ // CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr @[[KERNEL_NAME]]_exec_mode], section "llvm.metadata"
142
+ // CHECK: @[[KERNEL_NAME]]_kernel_environment = weak_odr protected constant %struct.KernelEnvironmentTy {
143
+ // CHECK-SAME: %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 [[EXEC_MODE]], {{.*}}},
144
+ // CHECK-SAME: ptr @{{.*}}, ptr @{{.*}} }
95
145
96
- // DEVICE : define weak_odr protected amdgpu_kernel void @[[KERNEL_NAME]]({{.*}})
97
- // DEVICE : %{{.*}} = call i32 @__kmpc_target_init(ptr @[[KERNEL_NAME]]_kernel_environment, {{.*}})
98
- // DEVICE : call void @[[TARGET_OUTLINE:.*]]({{.*}})
99
- // DEVICE : call void @__kmpc_target_deinit()
146
+ // CHECK : define weak_odr protected amdgpu_kernel void @[[KERNEL_NAME]]({{.*}})
147
+ // CHECK : %{{.*}} = call i32 @__kmpc_target_init(ptr @[[KERNEL_NAME]]_kernel_environment, {{.*}})
148
+ // CHECK : call void @[[TARGET_OUTLINE:.*]]({{.*}})
149
+ // CHECK : call void @__kmpc_target_deinit()
100
150
101
- // DEVICE : define internal void @[[TARGET_OUTLINE]]({{.*}})
102
- // DEVICE : call void @[[TEAMS_OUTLINE:.*]]({{.*}})
151
+ // CHECK : define internal void @[[TARGET_OUTLINE]]({{.*}})
152
+ // CHECK : call void @[[TEAMS_OUTLINE:.*]]({{.*}})
103
153
104
- // DEVICE : define internal void @[[TEAMS_OUTLINE]]({{.*}})
105
- // DEVICE : call void @__kmpc_distribute_static_loop{{.*}}({{.*}}, ptr @[[DISTRIBUTE_OUTLINE:[^,]*]], {{.*}})
154
+ // CHECK : define internal void @[[TEAMS_OUTLINE]]({{.*}})
155
+ // CHECK : call void @__kmpc_distribute_static_loop{{.*}}({{.*}}, ptr @[[DISTRIBUTE_OUTLINE:[^,]*]], {{.*}})
106
156
107
- // DEVICE: define internal void @[[DISTRIBUTE_OUTLINE]]({{.*}})
108
- // DEVICE: call void @__kmpc_parallel_51(ptr {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, ptr @[[PARALLEL_OUTLINE:.*]], ptr {{.*}}, ptr {{.*}}, i64 {{.*}})
157
+ // CHECK: define internal void @[[DISTRIBUTE_OUTLINE]]({{.*}})
158
+ // CHECK: call void @__kmpc_parallel_51(ptr {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, ptr @[[PARALLEL_OUTLINE0:.*]], ptr {{.*}}, ptr {{.*}}, i64 {{.*}})
159
+ // CHECK: call void @__kmpc_parallel_51(ptr {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, ptr @[[PARALLEL_OUTLINE1:.*]], ptr {{.*}}, ptr {{.*}}, i64 {{.*}})
109
160
110
- // DEVICE : define internal void @[[PARALLEL_OUTLINE ]]({{.*}})
111
- // DEVICE : call void @__kmpc_for_static_loop{{.*}} ({{.*}})
161
+ // CHECK : define internal void @[[PARALLEL_OUTLINE1 ]]({{.*}})
162
+ // CHECK : define internal void @[[PARALLEL_OUTLINE0]] ({{.*}})
0 commit comments