Skip to content

Commit 26a0d53

Browse files
committed
[CHR] Skip region containing llvm.coro.id
When a block containing llvm.coro.id is cloned during CHR, it inserts an invalid PHI node with token type to the beginning of the block containing llvm.coro.begin. To avoid such case, we exclude regions with llvm.coro.id. Reviewed By: ChuanqiXu Differential Revision: https://reviews.llvm.org/D124418
1 parent ccd047c commit 26a0d53

File tree

2 files changed

+129
-1
lines changed

2 files changed

+129
-1
lines changed

llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "llvm/IR/CFG.h"
2727
#include "llvm/IR/Dominators.h"
2828
#include "llvm/IR/IRBuilder.h"
29+
#include "llvm/IR/IntrinsicInst.h"
2930
#include "llvm/IR/MDBuilder.h"
3031
#include "llvm/IR/PassManager.h"
3132
#include "llvm/InitializePasses.h"
@@ -769,9 +770,21 @@ CHRScope * CHR::findScope(Region *R) {
769770
return nullptr;
770771
// If any of the basic blocks have address taken, we must skip this region
771772
// because we cannot clone basic blocks that have address taken.
772-
for (BasicBlock *BB : R->blocks())
773+
for (BasicBlock *BB : R->blocks()) {
773774
if (BB->hasAddressTaken())
774775
return nullptr;
776+
// If we encounter llvm.coro.id, skip this region because if the basic block
777+
// is cloned, we end up inserting a token type PHI node to the block with
778+
// llvm.coro.begin.
779+
// FIXME: This could lead to less optimal codegen, because the region is
780+
// excluded, it can prevent CHR from merging adjacent regions into bigger
781+
// scope and hoisting more branches.
782+
for (Instruction &I : *BB)
783+
if (auto *II = dyn_cast<IntrinsicInst>(&I))
784+
if (II->getIntrinsicID() == Intrinsic::coro_id)
785+
return nullptr;
786+
}
787+
775788
if (Exit) {
776789
// Try to find an if-then block (check if R is an if-then).
777790
// if (cond) {
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
;RUN: opt < %s -passes='require<profile-summary>,function(chr,instcombine,simplifycfg)' -S | FileCheck %s
3+
4+
declare void @foo()
5+
declare void @bar()
6+
7+
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
8+
declare i1 @llvm.coro.alloc(token)
9+
declare i8* @llvm.coro.begin(token, i8*)
10+
declare noalias i8* @malloc(i32)
11+
12+
%f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1 }
13+
14+
; resume part of the coroutine
15+
define fastcc void @f.resume(%f.Frame* noalias nonnull align 8 dereferenceable(24) %FramePtr) {
16+
tail call void @bar()
17+
ret void
18+
}
19+
20+
; destroy part of the coroutine
21+
define fastcc void @f.destroy(%f.Frame* noalias nonnull align 8 dereferenceable(24) %FramePtr) {
22+
tail call void @bar()
23+
ret void
24+
}
25+
26+
; cleanup part of the coroutine
27+
define fastcc void @f.cleanup(%f.Frame* noalias nonnull align 8 dereferenceable(24) %FramePtr) {
28+
tail call void @bar()
29+
ret void
30+
}
31+
32+
@f.resumers = private constant [3 x void (%f.Frame*)*] [void (%f.Frame*)* @f.resume, void (%f.Frame*)* @f.destroy, void (%f.Frame*)* @f.cleanup]
33+
34+
; Test that chr will skip block containing llvm.coro.id.
35+
define i8* @test_chr_with_coro_id(i32* %i) !prof !14 {
36+
; CHECK-LABEL: @test_chr_with_coro_id(
37+
; CHECK-NEXT: entry:
38+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
39+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
40+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
41+
; CHECK-NEXT: br i1 [[TMP2]], label %[[BB0:.*]], label %[[ENTRY_SPLIT_NONCHR:.*]], !prof !15
42+
; CHECK: [[BB0]]:
43+
; CHECK-NEXT: call void @foo()
44+
; CHECK-NEXT: br label %[[BB_CORO_ID:.*]]
45+
; CHECK: bb1.nonchr:
46+
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 2
47+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
48+
; CHECK-NEXT: br i1 [[TMP5]], label %[[BB2_NONCHR:.*]], label %[[BB_CORO_ID]], !prof !16
49+
; CHECK: [[BB2_NONCHR]]:
50+
; CHECK-NEXT: call void @foo()
51+
; CHECK-NEXT: br label %[[BB_CORO_ID]]
52+
; CHECK: [[BB_CORO_ID]]:
53+
; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id
54+
; CHECK-NEXT: [[NEED_DYN_ALLOC:%.*]] = call i1 @llvm.coro.alloc(token [[ID]])
55+
; CHECK-NEXT: br i1 [[NEED_DYN_ALLOC]], label %[[BB_CORO_DYN_ALLOC:.*]], label %[[BB_CORO_BEGIN:.*]]
56+
; CHECK: [[BB_CORO_BEGIN]]:
57+
; CHECK-NEXT: [[PHI:%.*]] = phi i8* [ null, %[[BB_CORO_ID]] ], [ %alloc, %[[BB_CORO_DYN_ALLOC]] ]
58+
; CHECK-NEXT: [[HDL:%.*]] = call noalias nonnull i8* @llvm.coro.begin(token [[ID]], i8* [[PHI]])
59+
;
60+
entry:
61+
%0 = load i32, i32* %i
62+
%1 = and i32 %0, 1
63+
%2 = icmp eq i32 %1, 0
64+
br i1 %2, label %bb1, label %bb0, !prof !15
65+
66+
bb0:
67+
call void @foo()
68+
br label %bb1
69+
70+
bb1:
71+
%3 = and i32 %0, 2
72+
%4 = icmp eq i32 %3, 0
73+
br i1 %4, label %bb2, label %bb.coro.id, !prof !15
74+
75+
bb2:
76+
call void @foo()
77+
br label %bb.coro.id
78+
79+
bb.coro.id:
80+
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%f.Frame*)*]* @f.resumers to i8*))
81+
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
82+
br i1 %need.dyn.alloc, label %bb.coro.dyn.alloc, label %bb.coro.begin
83+
84+
bb.coro.dyn.alloc:
85+
%alloc = call i8* @malloc(i32 24)
86+
br label %bb.coro.begin
87+
88+
bb.coro.begin:
89+
%phi = phi i8* [ null, %bb.coro.id ], [ %alloc, %bb.coro.dyn.alloc ]
90+
%hdl = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %phi)
91+
ret i8* %hdl
92+
}
93+
94+
!llvm.module.flags = !{!0}
95+
!0 = !{i32 1, !"ProfileSummary", !1}
96+
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
97+
!2 = !{!"ProfileFormat", !"InstrProf"}
98+
!3 = !{!"TotalCount", i64 10000}
99+
!4 = !{!"MaxCount", i64 10}
100+
!5 = !{!"MaxInternalCount", i64 1}
101+
!6 = !{!"MaxFunctionCount", i64 1000}
102+
!7 = !{!"NumCounts", i64 3}
103+
!8 = !{!"NumFunctions", i64 3}
104+
!9 = !{!"DetailedSummary", !10}
105+
!10 = !{!11, !12, !13}
106+
!11 = !{i32 10000, i64 100, i32 1}
107+
!12 = !{i32 999000, i64 100, i32 1}
108+
!13 = !{i32 999999, i64 1, i32 2}
109+
110+
!14 = !{!"function_entry_count", i64 100}
111+
!15 = !{!"branch_weights", i32 0, i32 1}
112+
!16 = !{!"branch_weights", i32 1, i32 1}
113+
!17 = !{!"branch_weights", i32 0, i32 0}
114+
; CHECK: !15 = !{!"branch_weights", i32 1000, i32 0}
115+
; CHECK: !16 = !{!"branch_weights", i32 0, i32 1}

0 commit comments

Comments
 (0)