-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[flang] fixing alloca hoisting for blocks having single op. #96009
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-codegen @llvm/pr-subscribers-flang-fir-hlfir Author: Vijay Kandiah (VijayKandiah) ChangesThis change fixes the issue #95977 due to commit c0cba51 inserting allocas after the terminator op in the insertion block in the case where the block had only a single operation, its terminator, in it. With this change, the hoisted constant-sized allocas are placed at the front of the insertion block, rather than right after the first operation in it. Full diff: https://github.com/llvm/llvm-project/pull/96009.diff 3 Files Affected:
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 4448224024f20..d57f2a75e3bd6 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -255,7 +255,7 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
mlir::Block *insertBlock =
getBlockForAllocaInsert(parentOp, parentRegion);
- size.getDefiningOp()->moveAfter(insertBlock, insertBlock->begin());
+ size.getDefiningOp()->moveBefore(&insertBlock->front());
rewriter.setInsertionPointAfter(size.getDefiningOp());
}
diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
index 45ff89bc40943..396fbaeacf39f 100644
--- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
@@ -26,6 +26,7 @@ func.func @_QPsb1(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!
// CHECK-LABEL: _QPsb1
// CHECK-SAME: %[[N_REF:.*]]: !llvm.ptr {fir.bindc_name = "n"}, %[[ARR_REF:.*]]: !llvm.ptr {fir.bindc_name = "arr"}) {
+// CHECK: %[[ONE_0:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[ONE_1:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[ONE_2:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: omp.parallel {
@@ -207,6 +208,7 @@ func.func @_QPsimd1(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref
// CHECK-LABEL: _QPsimd1
// CHECK-SAME: %[[N_REF:.*]]: !llvm.ptr {fir.bindc_name = "n"}, %[[ARR_REF:.*]]: !llvm.ptr {fir.bindc_name = "arr"}) {
+// CHECK: %[[ONE_0:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[ONE_1:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[ONE_2:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: omp.parallel {
@@ -281,7 +283,6 @@ func.func @_QPomp_target_data() {
}
// CHECK-LABEL: llvm.func @_QPomp_target_data() {
-// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1024 : index) : i64
// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[VAL_2:.*]] = llvm.alloca %[[VAL_1]] x !llvm.array<1024 x i32> {bindc_name = "d"} : (i64) -> !llvm.ptr
// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(1 : i64) : i64
@@ -290,6 +291,7 @@ func.func @_QPomp_target_data() {
// CHECK: %[[VAL_6:.*]] = llvm.alloca %[[VAL_5]] x !llvm.array<1024 x i32> {bindc_name = "b"} : (i64) -> !llvm.ptr
// CHECK: %[[VAL_7:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[VAL_8:.*]] = llvm.alloca %[[VAL_7]] x !llvm.array<1024 x i32> {bindc_name = "a"} : (i64) -> !llvm.ptr
+// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1024 : index) : i64
// CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(1024 : index) : i64
// CHECK: %[[VAL_10:.*]] = llvm.mlir.constant(1024 : index) : i64
// CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(1024 : index) : i64
@@ -373,9 +375,9 @@ func.func @_QPopenmp_target_data_region() {
}
// CHECK-LABEL: llvm.func @_QPopenmp_target_data_region() {
-// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<1024 x i32> {bindc_name = "a"} : (i64) -> !llvm.ptr
// CHECK: %[[VAL_MAX:.*]] = llvm.mlir.constant(1024 : index) : i64
// CHECK: %[[VAL_ONE:.*]] = llvm.mlir.constant(1 : index) : i64
@@ -459,15 +461,15 @@ func.func @_QPomp_target() {
}
// CHECK-LABEL: llvm.func @_QPomp_target() {
-// CHECK: %[[EXTENT:.*]] = llvm.mlir.constant(512 : index) : i64
// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<512 x i32> {bindc_name = "a"} : (i64) -> !llvm.ptr
+// CHECK: %[[EXTENT:.*]] = llvm.mlir.constant(512 : index) : i64
// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(64 : i32) : i32
// CHECK: %[[STRIDE:.*]] = llvm.mlir.constant(1 : index) : i64
// CHECK: %[[LOWER:.*]] = llvm.mlir.constant(0 : index) : i64
// CHECK: %[[UPPER:.*]] = llvm.mlir.constant(511 : index) : i64
// CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%[[LOWER]] : i64) upper_bound(%[[UPPER]] : i64) extent(%[[EXTENT]] : i64) stride(%[[STRIDE]] : i64) start_idx(%[[STRIDE]] : i64)
-// CHECK: %[[MAP:.*]] = omp.map.info var_ptr(%2 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr {name = "a"}
+// CHECK: %[[MAP:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr {name = "a"}
// CHECK: omp.target thread_limit(%[[VAL_2]] : i32) map_entries(%[[MAP]] -> %[[ARG_0:.*]] : !llvm.ptr) {
// CHECK: ^bb0(%[[ARG_0]]: !llvm.ptr):
// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(10 : i32) : i32
@@ -715,7 +717,8 @@ func.func @_QPsb() {
// CHECK: }
// CHECK-LABEL: @_QPsimple_reduction
// CHECK-SAME: %[[ARRAY_REF:.*]]: !llvm.ptr
-// CHECK: %[[RED_ACCUMULATOR:.*]] = llvm.alloca %1 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr
+// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(1 : i64) : i64
+// CHECK: %[[RED_ACCUMULATOR:.*]] = llvm.alloca %[[VAL_1]] x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr
// CHECK: omp.parallel {
// CHECK: omp.wsloop reduction(@[[EQV_REDUCTION]] %[[RED_ACCUMULATOR]] -> %[[PRV:.+]] : !llvm.ptr) {
// CHECK-NEXT: omp.loop_nest
diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir
index d7059671d3a88..782dcba2aa628 100644
--- a/flang/test/Fir/convert-to-llvm.fir
+++ b/flang/test/Fir/convert-to-llvm.fir
@@ -2014,7 +2014,6 @@ func.func private @_QPtest_dt_callee(%arg0: !fir.box<!fir.array<?xi32>>)
// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
// AMDGPU: %[[AA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr
-// CHECK: %[[C20:.*]] = llvm.mlir.constant(20 : index) : i64
// CHECK: %[[ALLOCA_SIZE_X:.*]] = llvm.mlir.constant(1 : i64) : i64
// GENERIC: %[[X:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x"} : (i64) -> !llvm.ptr
// AMDGPU: %[[AC:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x"} : (i64) -> !llvm.ptr<5>
@@ -2023,6 +2022,7 @@ func.func private @_QPtest_dt_callee(%arg0: !fir.box<!fir.array<?xi32>>)
// GENERIC: %[[V:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v"} : (i64) -> !llvm.ptr
// AMDGPU: %[[AB:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v"} : (i64) -> !llvm.ptr<5>
// AMDGPU: %[[V:.*]] = llvm.addrspacecast %[[AB]] : !llvm.ptr<5> to !llvm.ptr
+// CHECK: %[[C20:.*]] = llvm.mlir.constant(20 : index) : i64
// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[C10:.*]] = llvm.mlir.constant(10 : i64) : i64
// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64
|
@llvm/pr-subscribers-flang-openmp Author: Vijay Kandiah (VijayKandiah) ChangesThis change fixes the issue #95977 due to commit c0cba51 inserting allocas after the terminator op in the insertion block in the case where the block had only a single operation, its terminator, in it. With this change, the hoisted constant-sized allocas are placed at the front of the insertion block, rather than right after the first operation in it. Full diff: https://github.com/llvm/llvm-project/pull/96009.diff 3 Files Affected:
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 4448224024f20..d57f2a75e3bd6 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -255,7 +255,7 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
mlir::Block *insertBlock =
getBlockForAllocaInsert(parentOp, parentRegion);
- size.getDefiningOp()->moveAfter(insertBlock, insertBlock->begin());
+ size.getDefiningOp()->moveBefore(&insertBlock->front());
rewriter.setInsertionPointAfter(size.getDefiningOp());
}
diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
index 45ff89bc40943..396fbaeacf39f 100644
--- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
@@ -26,6 +26,7 @@ func.func @_QPsb1(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!
// CHECK-LABEL: _QPsb1
// CHECK-SAME: %[[N_REF:.*]]: !llvm.ptr {fir.bindc_name = "n"}, %[[ARR_REF:.*]]: !llvm.ptr {fir.bindc_name = "arr"}) {
+// CHECK: %[[ONE_0:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[ONE_1:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[ONE_2:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: omp.parallel {
@@ -207,6 +208,7 @@ func.func @_QPsimd1(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref
// CHECK-LABEL: _QPsimd1
// CHECK-SAME: %[[N_REF:.*]]: !llvm.ptr {fir.bindc_name = "n"}, %[[ARR_REF:.*]]: !llvm.ptr {fir.bindc_name = "arr"}) {
+// CHECK: %[[ONE_0:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[ONE_1:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[ONE_2:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: omp.parallel {
@@ -281,7 +283,6 @@ func.func @_QPomp_target_data() {
}
// CHECK-LABEL: llvm.func @_QPomp_target_data() {
-// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1024 : index) : i64
// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[VAL_2:.*]] = llvm.alloca %[[VAL_1]] x !llvm.array<1024 x i32> {bindc_name = "d"} : (i64) -> !llvm.ptr
// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(1 : i64) : i64
@@ -290,6 +291,7 @@ func.func @_QPomp_target_data() {
// CHECK: %[[VAL_6:.*]] = llvm.alloca %[[VAL_5]] x !llvm.array<1024 x i32> {bindc_name = "b"} : (i64) -> !llvm.ptr
// CHECK: %[[VAL_7:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[VAL_8:.*]] = llvm.alloca %[[VAL_7]] x !llvm.array<1024 x i32> {bindc_name = "a"} : (i64) -> !llvm.ptr
+// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1024 : index) : i64
// CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(1024 : index) : i64
// CHECK: %[[VAL_10:.*]] = llvm.mlir.constant(1024 : index) : i64
// CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(1024 : index) : i64
@@ -373,9 +375,9 @@ func.func @_QPopenmp_target_data_region() {
}
// CHECK-LABEL: llvm.func @_QPopenmp_target_data_region() {
-// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<1024 x i32> {bindc_name = "a"} : (i64) -> !llvm.ptr
// CHECK: %[[VAL_MAX:.*]] = llvm.mlir.constant(1024 : index) : i64
// CHECK: %[[VAL_ONE:.*]] = llvm.mlir.constant(1 : index) : i64
@@ -459,15 +461,15 @@ func.func @_QPomp_target() {
}
// CHECK-LABEL: llvm.func @_QPomp_target() {
-// CHECK: %[[EXTENT:.*]] = llvm.mlir.constant(512 : index) : i64
// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<512 x i32> {bindc_name = "a"} : (i64) -> !llvm.ptr
+// CHECK: %[[EXTENT:.*]] = llvm.mlir.constant(512 : index) : i64
// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(64 : i32) : i32
// CHECK: %[[STRIDE:.*]] = llvm.mlir.constant(1 : index) : i64
// CHECK: %[[LOWER:.*]] = llvm.mlir.constant(0 : index) : i64
// CHECK: %[[UPPER:.*]] = llvm.mlir.constant(511 : index) : i64
// CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%[[LOWER]] : i64) upper_bound(%[[UPPER]] : i64) extent(%[[EXTENT]] : i64) stride(%[[STRIDE]] : i64) start_idx(%[[STRIDE]] : i64)
-// CHECK: %[[MAP:.*]] = omp.map.info var_ptr(%2 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr {name = "a"}
+// CHECK: %[[MAP:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr {name = "a"}
// CHECK: omp.target thread_limit(%[[VAL_2]] : i32) map_entries(%[[MAP]] -> %[[ARG_0:.*]] : !llvm.ptr) {
// CHECK: ^bb0(%[[ARG_0]]: !llvm.ptr):
// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(10 : i32) : i32
@@ -715,7 +717,8 @@ func.func @_QPsb() {
// CHECK: }
// CHECK-LABEL: @_QPsimple_reduction
// CHECK-SAME: %[[ARRAY_REF:.*]]: !llvm.ptr
-// CHECK: %[[RED_ACCUMULATOR:.*]] = llvm.alloca %1 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr
+// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(1 : i64) : i64
+// CHECK: %[[RED_ACCUMULATOR:.*]] = llvm.alloca %[[VAL_1]] x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr
// CHECK: omp.parallel {
// CHECK: omp.wsloop reduction(@[[EQV_REDUCTION]] %[[RED_ACCUMULATOR]] -> %[[PRV:.+]] : !llvm.ptr) {
// CHECK-NEXT: omp.loop_nest
diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir
index d7059671d3a88..782dcba2aa628 100644
--- a/flang/test/Fir/convert-to-llvm.fir
+++ b/flang/test/Fir/convert-to-llvm.fir
@@ -2014,7 +2014,6 @@ func.func private @_QPtest_dt_callee(%arg0: !fir.box<!fir.array<?xi32>>)
// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
// AMDGPU: %[[AA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr
-// CHECK: %[[C20:.*]] = llvm.mlir.constant(20 : index) : i64
// CHECK: %[[ALLOCA_SIZE_X:.*]] = llvm.mlir.constant(1 : i64) : i64
// GENERIC: %[[X:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x"} : (i64) -> !llvm.ptr
// AMDGPU: %[[AC:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x"} : (i64) -> !llvm.ptr<5>
@@ -2023,6 +2022,7 @@ func.func private @_QPtest_dt_callee(%arg0: !fir.box<!fir.array<?xi32>>)
// GENERIC: %[[V:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v"} : (i64) -> !llvm.ptr
// AMDGPU: %[[AB:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v"} : (i64) -> !llvm.ptr<5>
// AMDGPU: %[[V:.*]] = llvm.addrspacecast %[[AB]] : !llvm.ptr<5> to !llvm.ptr
+// CHECK: %[[C20:.*]] = llvm.mlir.constant(20 : index) : i64
// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[C10:.*]] = llvm.mlir.constant(10 : i64) : i64
// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64
|
LGTM. I can confirm this fixes the problem. Thanks! |
This change fixes the issue llvm#95977 due to commit c0cba51 inserting allocas after the terminator op in the insertion block in the case where the block had only a single operation, its terminator, in it. With this change, the hoisted constant-sized allocas are placed at the front of the insertion block, rather than right after the first operation in it.
This change fixes the issue #95977 due to commit c0cba51 inserting allocas after the terminator op in the insertion block in the case where the block had only a single operation, its terminator, in it. With this change, the hoisted constant-sized allocas are placed at the front of the insertion block, rather than right after the first operation in it.