@@ -17,7 +17,7 @@ llvm.func @init_mbarrier(%barrier_gen : !llvm.ptr, %barrier : !llvm.ptr<3>, %cou
17
17
llvm.func @init_mbarrier_arrive_expect_tx (%barrier : !llvm.ptr <3 >, %txcount : i32 , %pred : i1 ) {
18
18
//CHECK: llvm.inline_asm has_side_effects asm_dialect = att "mbarrier.arrive.expect_tx.shared.b64 _, [$0], $1;", "r,r"
19
19
nvvm.mbarrier.arrive.expect_tx.shared %barrier , %txcount : !llvm.ptr <3 >, i32
20
- //CHECK : llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.arrive.expect_tx.shared.b64 _, [$0], $1;", "r,r,b "
20
+ //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.arrive.expect_tx.shared.b64 _, [$0], $1;", "r,r,b"
21
21
nvvm.mbarrier.arrive.expect_tx.shared %barrier , %txcount , predicate = %pred : !llvm.ptr <3 >, i32 , i1
22
22
llvm.return
23
23
}
@@ -129,7 +129,7 @@ func.func @tma_load_5d_all(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %bar
129
129
func.func @tma_load_1d (%tmaDescriptor: !llvm.ptr , %dest : !llvm.ptr <3 >, %barrier: !llvm.ptr <3 >, %crd0: i32 , %p : i1 ) {
130
130
// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.1d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2} ], [$3];", "r,l,r,r"
131
131
nvvm.cp.async.bulk.tensor.shared.cluster.global %dest , %tmaDescriptor , %barrier , box [%crd0 ] : !llvm.ptr <3 >, !llvm.ptr
132
- // CHECK : llvm.inline_asm has_side_effects asm_dialect = att "@$4 cp.async.bulk.tensor.1d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2} ], [$3];", "l,r ,r,r,b"
132
+ // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$4 cp.async.bulk.tensor.1d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2} ], [$3];", "r,l ,r,r,b"
133
133
nvvm.cp.async.bulk.tensor.shared.cluster.global %dest , %tmaDescriptor , %barrier , box [%crd0 ] predicate =%p : !llvm.ptr <3 >, !llvm.ptr
134
134
return
135
135
}
@@ -138,7 +138,7 @@ func.func @tma_load_1d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier
138
138
func.func @tma_load_2d (%tmaDescriptor: !llvm.ptr , %dest : !llvm.ptr <3 >, %barrier: !llvm.ptr <3 >, %crd0: i32 , %crd1: i32 , %p : i1 ) {
139
139
// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.2d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2,$3} ], [$4];", "r,l,r,r,r"
140
140
nvvm.cp.async.bulk.tensor.shared.cluster.global %dest , %tmaDescriptor , %barrier , box [%crd0 ,%crd1 ] : !llvm.ptr <3 >, !llvm.ptr
141
- // CHECK : llvm.inline_asm has_side_effects asm_dialect = att "@$5 cp.async.bulk.tensor.2d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2,$3} ], [$4];", "l,r ,r,r,r,b"
141
+ // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$5 cp.async.bulk.tensor.2d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2,$3} ], [$4];", "r,l ,r,r,r,b"
142
142
nvvm.cp.async.bulk.tensor.shared.cluster.global %dest , %tmaDescriptor , %barrier , box [%crd0 ,%crd1 ] predicate =%p : !llvm.ptr <3 >, !llvm.ptr
143
143
return
144
144
}
@@ -147,7 +147,7 @@ func.func @tma_load_2d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier
147
147
func.func @tma_load_3d (%tmaDescriptor: !llvm.ptr , %dest : !llvm.ptr <3 >, %barrier: !llvm.ptr <3 >, %crd0: i32 , %crd1: i32 , %crd2: i32 , %p : i1 ) {
148
148
// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.3d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2,$3,$4} ], [$5];", "r,l,r,r,r,r"
149
149
nvvm.cp.async.bulk.tensor.shared.cluster.global %dest , %tmaDescriptor , %barrier , box [%crd0 ,%crd1 ,%crd2 ] : !llvm.ptr <3 >, !llvm.ptr
150
- // CHECK : llvm.inline_asm has_side_effects asm_dialect = att "@$6 cp.async.bulk.tensor.3d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2,$3,$4}], [$5];", "l,r ,r,r,r,r,b"
150
+ // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$6 cp.async.bulk.tensor.3d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2,$3,$4} ], [$5];", "r,l ,r,r,r,r,b"
151
151
nvvm.cp.async.bulk.tensor.shared.cluster.global %dest , %tmaDescriptor , %barrier , box [%crd0 ,%crd1 ,%crd2 ] predicate =%p : !llvm.ptr <3 >, !llvm.ptr
152
152
return
153
153
}
@@ -156,7 +156,7 @@ func.func @tma_load_3d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier
156
156
func.func @tma_load_4d (%tmaDescriptor: !llvm.ptr , %dest : !llvm.ptr <3 >, %barrier: !llvm.ptr <3 >, %crd0: i32 , %crd1: i32 , %crd2: i32 , %crd3: i32 , %p : i1 ) {
157
157
// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.4d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2,$3,$4,$5} ], [$6];", "r,l,r,r,r,r,r"
158
158
nvvm.cp.async.bulk.tensor.shared.cluster.global %dest , %tmaDescriptor , %barrier , box [%crd0 ,%crd1 ,%crd2 ,%crd3 ] : !llvm.ptr <3 >, !llvm.ptr
159
- // CHECK : llvm.inline_asm has_side_effects asm_dialect = att "@$7 cp.async.bulk.tensor.4d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2,$3,$4,$5}], [$6];", "l,r ,r,r,r,r,r,b"
159
+ // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$7 cp.async.bulk.tensor.4d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2,$3,$4,$5} ], [$6];", "r,l ,r,r,r,r,r,b"
160
160
nvvm.cp.async.bulk.tensor.shared.cluster.global %dest , %tmaDescriptor , %barrier , box [%crd0 ,%crd1 ,%crd2 ,%crd3 ] predicate =%p : !llvm.ptr <3 >, !llvm.ptr
161
161
return
162
162
}
@@ -165,7 +165,7 @@ func.func @tma_load_4d(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<3>, %barrier
165
165
func.func @tma_load_5d (%tmaDescriptor: !llvm.ptr , %dest : !llvm.ptr <3 >, %barrier: !llvm.ptr <3 >, %crd0: i32 , %crd1: i32 , %crd2: i32 , %crd3: i32 , %crd4: i32 , %p : i1 ) {
166
166
// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.tensor.5d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2,$3,$4,$5,$6} ], [$7];", "r,l,r,r,r,r,r,r"
167
167
nvvm.cp.async.bulk.tensor.shared.cluster.global %dest , %tmaDescriptor , %barrier , box [%crd0 ,%crd1 ,%crd2 ,%crd3 ,%crd4 ] : !llvm.ptr <3 >, !llvm.ptr
168
- // CHECK : llvm.inline_asm has_side_effects asm_dialect = att "@$8 cp.async.bulk.tensor.5d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2,$3,$4,$5,$6}], [$7];", "l,r ,r,r,r,r,r,r,b"
168
+ // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$8 cp.async.bulk.tensor.5d.shared::cluster.global.mbarrier::complete_tx::bytes [$0], [$1, {$2,$3,$4,$5,$6} ], [$7];", "r,l ,r,r,r,r,r,r,b"
169
169
nvvm.cp.async.bulk.tensor.shared.cluster.global %dest , %tmaDescriptor , %barrier , box [%crd0 ,%crd1 ,%crd2 ,%crd3 ,%crd4 ] predicate =%p : !llvm.ptr <3 >, !llvm.ptr
170
170
return
171
171
}
0 commit comments