Skip to content

Commit c1548df

Browse files
fhahncjdb
authored andcommitted
[VPlan] Skip branches marked as dead in cost precomputation.
Don't consider the cost of branches marked to be skipped in VPlan cost pre-computation. Those aren't included in the legacy cost, so they should not be included in the VPlan cast.
1 parent f2fef0a commit c1548df

File tree

2 files changed

+155
-1
lines changed

2 files changed

+155
-1
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7215,9 +7215,11 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
72157215
// TODO: Compute cost of branches for each replicate region in the VPlan,
72167216
// which is more accurate than the legacy cost model.
72177217
for (BasicBlock *BB : OrigLoop->blocks()) {
7218-
if (BB == OrigLoop->getLoopLatch())
7218+
if (CostCtx.skipCostComputation(BB->getTerminator(), VF.isVector()))
72197219
continue;
72207220
CostCtx.SkipCostComputation.insert(BB->getTerminator());
7221+
if (BB == OrigLoop->getLoopLatch())
7222+
continue;
72217223
auto BranchCost = CostCtx.getLegacyCost(BB->getTerminator(), VF);
72227224
Cost += BranchCost;
72237225
}

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1697,6 +1697,154 @@ exit:
16971697
ret void
16981698
}
16991699

1700+
define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
1701+
; DEFAULT-LABEL: define void @redundant_branch_and_tail_folding(
1702+
; DEFAULT-SAME: ptr [[DST:%.*]], i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] {
1703+
; DEFAULT-NEXT: entry:
1704+
; DEFAULT-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1705+
; DEFAULT: vector.ph:
1706+
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
1707+
; DEFAULT: vector.body:
1708+
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
1709+
; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
1710+
; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], <i64 20, i64 20, i64 20, i64 20>
1711+
; DEFAULT-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
1712+
; DEFAULT-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
1713+
; DEFAULT-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
1714+
; DEFAULT-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1715+
; DEFAULT: pred.store.if:
1716+
; DEFAULT-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
1717+
; DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4
1718+
; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE]]
1719+
; DEFAULT: pred.store.continue:
1720+
; DEFAULT-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
1721+
; DEFAULT-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
1722+
; DEFAULT: pred.store.if1:
1723+
; DEFAULT-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
1724+
; DEFAULT-NEXT: store i32 [[TMP6]], ptr [[DST]], align 4
1725+
; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE2]]
1726+
; DEFAULT: pred.store.continue2:
1727+
; DEFAULT-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
1728+
; DEFAULT-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
1729+
; DEFAULT: pred.store.if3:
1730+
; DEFAULT-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
1731+
; DEFAULT-NEXT: store i32 [[TMP8]], ptr [[DST]], align 4
1732+
; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE4]]
1733+
; DEFAULT: pred.store.continue4:
1734+
; DEFAULT-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
1735+
; DEFAULT-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
1736+
; DEFAULT: pred.store.if5:
1737+
; DEFAULT-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
1738+
; DEFAULT-NEXT: store i32 [[TMP10]], ptr [[DST]], align 4
1739+
; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE6]]
1740+
; DEFAULT: pred.store.continue6:
1741+
; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1742+
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
1743+
; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
1744+
; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
1745+
; DEFAULT: middle.block:
1746+
; DEFAULT-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
1747+
; DEFAULT: scalar.ph:
1748+
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1749+
; DEFAULT-NEXT: br label [[LOOP_HEADER:%.*]]
1750+
; DEFAULT: loop.header:
1751+
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1752+
; DEFAULT-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1753+
; DEFAULT: then:
1754+
; DEFAULT-NEXT: br label [[LOOP_LATCH]]
1755+
; DEFAULT: loop.latch:
1756+
; DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1757+
; DEFAULT-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32
1758+
; DEFAULT-NEXT: store i32 [[T]], ptr [[DST]], align 4
1759+
; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 21
1760+
; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP31:![0-9]+]]
1761+
; DEFAULT: exit:
1762+
; DEFAULT-NEXT: ret void
1763+
;
1764+
; PRED-LABEL: define void @redundant_branch_and_tail_folding(
1765+
; PRED-SAME: ptr [[DST:%.*]], i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] {
1766+
; PRED-NEXT: entry:
1767+
; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1768+
; PRED: vector.ph:
1769+
; PRED-NEXT: br label [[VECTOR_BODY:%.*]]
1770+
; PRED: vector.body:
1771+
; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
1772+
; PRED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
1773+
; PRED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], <i64 20, i64 20, i64 20, i64 20>
1774+
; PRED-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
1775+
; PRED-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
1776+
; PRED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
1777+
; PRED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1778+
; PRED: pred.store.if:
1779+
; PRED-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
1780+
; PRED-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4
1781+
; PRED-NEXT: br label [[PRED_STORE_CONTINUE]]
1782+
; PRED: pred.store.continue:
1783+
; PRED-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
1784+
; PRED-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
1785+
; PRED: pred.store.if1:
1786+
; PRED-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
1787+
; PRED-NEXT: store i32 [[TMP6]], ptr [[DST]], align 4
1788+
; PRED-NEXT: br label [[PRED_STORE_CONTINUE2]]
1789+
; PRED: pred.store.continue2:
1790+
; PRED-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
1791+
; PRED-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
1792+
; PRED: pred.store.if3:
1793+
; PRED-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
1794+
; PRED-NEXT: store i32 [[TMP8]], ptr [[DST]], align 4
1795+
; PRED-NEXT: br label [[PRED_STORE_CONTINUE4]]
1796+
; PRED: pred.store.continue4:
1797+
; PRED-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
1798+
; PRED-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
1799+
; PRED: pred.store.if5:
1800+
; PRED-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
1801+
; PRED-NEXT: store i32 [[TMP10]], ptr [[DST]], align 4
1802+
; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]]
1803+
; PRED: pred.store.continue6:
1804+
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1805+
; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
1806+
; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
1807+
; PRED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
1808+
; PRED: middle.block:
1809+
; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
1810+
; PRED: scalar.ph:
1811+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1812+
; PRED-NEXT: br label [[LOOP_HEADER:%.*]]
1813+
; PRED: loop.header:
1814+
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1815+
; PRED-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1816+
; PRED: then:
1817+
; PRED-NEXT: br label [[LOOP_LATCH]]
1818+
; PRED: loop.latch:
1819+
; PRED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1820+
; PRED-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32
1821+
; PRED-NEXT: store i32 [[T]], ptr [[DST]], align 4
1822+
; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 21
1823+
; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP28:![0-9]+]]
1824+
; PRED: exit:
1825+
; PRED-NEXT: ret void
1826+
;
1827+
entry:
1828+
br label %loop.header
1829+
1830+
loop.header:
1831+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1832+
br i1 %c, label %loop.latch, label %then
1833+
1834+
then:
1835+
br label %loop.latch
1836+
1837+
loop.latch:
1838+
%iv.next = add nuw nsw i64 %iv, 1
1839+
%t = trunc nuw nsw i64 %iv.next to i32
1840+
store i32 %t, ptr %dst, align 4
1841+
%ec = icmp eq i64 %iv.next, 21
1842+
br i1 %ec, label %exit, label %loop.header
1843+
1844+
exit:
1845+
ret void
1846+
}
1847+
17001848
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
17011849
declare float @llvm.fmuladd.f32(float, float, float) #1
17021850

@@ -1734,6 +1882,8 @@ attributes #2 = { vscale_range(2,2) "target-cpu"="neoverse-512tvb" }
17341882
; DEFAULT: [[LOOP27]] = distinct !{[[LOOP27]], [[META2]], [[META1]]}
17351883
; DEFAULT: [[LOOP28]] = distinct !{[[LOOP28]], [[META1]], [[META2]]}
17361884
; DEFAULT: [[LOOP29]] = distinct !{[[LOOP29]], [[META1]]}
1885+
; DEFAULT: [[LOOP30]] = distinct !{[[LOOP30]], [[META1]], [[META2]]}
1886+
; DEFAULT: [[LOOP31]] = distinct !{[[LOOP31]], [[META2]], [[META1]]}
17371887
;.
17381888
; PRED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
17391889
; PRED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -1762,4 +1912,6 @@ attributes #2 = { vscale_range(2,2) "target-cpu"="neoverse-512tvb" }
17621912
; PRED: [[LOOP24]] = distinct !{[[LOOP24]], [[META2]], [[META1]]}
17631913
; PRED: [[LOOP25]] = distinct !{[[LOOP25]], [[META1]], [[META2]]}
17641914
; PRED: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]]}
1915+
; PRED: [[LOOP27]] = distinct !{[[LOOP27]], [[META1]], [[META2]]}
1916+
; PRED: [[LOOP28]] = distinct !{[[LOOP28]], [[META2]], [[META1]]}
17651917
;.

0 commit comments

Comments
 (0)