Skip to content

Commit 5793dc6

Browse files
committed
[CodeGenPGO] Ensure that PGO correctly instruments cilk_for loops with atomic instrumentation. Fixes llvm#30
1 parent f58199d commit 5793dc6

File tree

2 files changed

+77
-0
lines changed

2 files changed

+77
-0
lines changed

clang/lib/CodeGen/CodeGenPGO.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ class PGOHash {
125125
BinaryOperatorNE,
126126
// The preceding values are available since PGO_HASH_V2.
127127

128+
// Cilk statements. These values are also available with PGO_HASH_V1.
129+
CilkForStmt,
130+
128131
// Keep this last. It's for the static assert that follows.
129132
LastHashType
130133
};
@@ -266,6 +269,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
266269
DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
267270
DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
268271
DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)
272+
DEFINE_NESTABLE_TRAVERSAL(CilkForStmt)
269273

270274
/// Get version \p HashVersion of the PGO hash for \p S.
271275
PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
@@ -326,6 +330,8 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
326330
}
327331
break;
328332
}
333+
case Stmt::CilkForStmtClass:
334+
return PGOHash::CilkForStmt;
329335
}
330336

331337
if (HashVersion >= PGO_HASH_V2) {
@@ -743,6 +749,53 @@ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
743749
setCount(ParentCount + RHSCount - CurrentCount);
744750
RecordNextStmtCount = true;
745751
}
752+
753+
void VisitCilkForStmt(const CilkForStmt *S) {
754+
RecordStmtCount(S);
755+
if (S->getInit())
756+
Visit(S->getInit());
757+
if (S->getLimitStmt())
758+
Visit(S->getLimitStmt());
759+
if (S->getBeginStmt())
760+
Visit(S->getBeginStmt());
761+
if (S->getEndStmt())
762+
Visit(S->getEndStmt());
763+
if (S->getLoopVarDecl())
764+
Visit(S->getLoopVarDecl());
765+
766+
uint64_t ParentCount = CurrentCount;
767+
768+
BreakContinueStack.push_back(BreakContinue());
769+
// Visit the body region first. (This is basically the same as a while
770+
// loop; see further comments in VisitWhileStmt.)
771+
uint64_t BodyCount = setCount(PGO.getRegionCount(S));
772+
CountMap[S->getBody()] = BodyCount;
773+
Visit(S->getBody());
774+
uint64_t BackedgeCount = CurrentCount;
775+
BreakContinue BC = BreakContinueStack.pop_back_val();
776+
777+
// The increment is essentially part of the body but it needs to include
778+
// the count for all the continue statements.
779+
if (S->getInc()) {
780+
uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
781+
CountMap[S->getInc()] = IncCount;
782+
Visit(S->getInc());
783+
}
784+
785+
// ...then go back and propagate counts through the condition.
786+
uint64_t CondCount =
787+
setCount(ParentCount + BackedgeCount + BC.ContinueCount);
788+
if (S->getInitCond()) {
789+
CountMap[S->getInitCond()] = ParentCount;
790+
Visit(S->getInitCond());
791+
}
792+
if (S->getCond()) {
793+
CountMap[S->getCond()] = CondCount;
794+
Visit(S->getCond());
795+
}
796+
setCount(BC.BreakCount + CondCount - BodyCount);
797+
RecordNextStmtCount = true;
798+
}
746799
};
747800
} // end anonymous namespace
748801

clang/test/Cilk/cilkfor-pgo.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Check that -fprofile-instrument generates atomic
2+
// instrumentation instructions inside of _Cilk_for loops.
3+
//
4+
// Credit to Brian Wheatman for the original source of this test.
5+
//
6+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fprofile-instrument=clang -fprofile-update=atomic %s -S -emit-llvm -fopencilk -ftapir=none -o - 2>&1 | FileCheck %s
7+
// expected-no-diagnostics
8+
9+
int main() {
10+
int sum = 0;
11+
_Cilk_for(int i = 0; i < 1000000; i++) { sum += i; }
12+
13+
return sum;
14+
}
15+
16+
// CHECK: @__profc_main = {{.*}}global [2 x i64] zeroinitializer, section "__llvm_prf_cnts"
17+
18+
// CHECK-LABEL: define {{.*}}i32 @main()
19+
20+
// CHECK: detach within %{{.+}}, label %[[PFOR_BODY:.+]], label %[[PFOR_INC:.+]]
21+
22+
// CHECK: [[PFOR_BODY]]:
23+
// CHECK: atomicrmw add i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__profc_main, i64 0, i64 1), i64 1 monotonic
24+
// CHECK: reattach within %{{.+}}, label %[[PFOR_INC]]

0 commit comments

Comments
 (0)