Skip to content

Commit af5a45b

Browse files
authored
[X86,SimplifyCFG] Use passthru to reduce select (llvm#108754)
1 parent 87d56c5 commit af5a45b

File tree

2 files changed

+75
-10
lines changed

2 files changed

+75
-10
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3040,7 +3040,7 @@ static bool isSafeCheapLoadStore(const Instruction *I,
30403040
/// %sub = sub %x, %y
30413041
/// br label BB2
30423042
/// EndBB:
3043-
/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
3043+
/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
30443044
/// ...
30453045
/// \endcode
30463046
///
@@ -3338,9 +3338,20 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
33383338
if (auto *LI = dyn_cast<LoadInst>(I)) {
33393339
// Handle Load.
33403340
auto *Ty = I->getType();
3341-
MaskedLoadStore = Builder.CreateMaskedLoad(FixedVectorType::get(Ty, 1),
3342-
Op0, LI->getAlign(), Mask);
3343-
I->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty));
3341+
PHINode *PN = nullptr;
3342+
Value *PassThru = nullptr;
3343+
for (User *U : I->users())
3344+
if ((PN = dyn_cast<PHINode>(U))) {
3345+
PassThru = Builder.CreateBitCast(PN->getIncomingValueForBlock(BB),
3346+
FixedVectorType::get(Ty, 1));
3347+
break;
3348+
}
3349+
MaskedLoadStore = Builder.CreateMaskedLoad(
3350+
FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
3351+
Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
3352+
if (PN)
3353+
PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
3354+
I->replaceAllUsesWith(NewLoadStore);
33443355
} else {
33453356
// Handle Store.
33463357
auto *StoredVal =

llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,9 @@ define i32 @succ1to0_phi(ptr %p) {
7272
; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
7373
; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
7474
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
75-
; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
75+
; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
7676
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
77-
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 0, i32 [[TMP3]]
78-
; CHECK-NEXT: ret i32 [[SPEC_SELECT]]
77+
; CHECK-NEXT: ret i32 [[TMP3]]
7978
;
8079
entry:
8180
%cond = icmp eq ptr %p, null
@@ -184,10 +183,9 @@ define i32 @load_from_gep(ptr %p) {
184183
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
185184
; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
186185
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
187-
; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[ARRAYIDX]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
186+
; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[ARRAYIDX]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
188187
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
189-
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 0, i32 [[TMP3]]
190-
; CHECK-NEXT: ret i32 [[SPEC_SELECT]]
188+
; CHECK-NEXT: ret i32 [[TMP3]]
191189
;
192190
entry:
193191
%cond = icmp eq ptr %p, null
@@ -674,6 +672,62 @@ if.false:
674672
ret void
675673
}
676674

675+
define i32 @str_transcode0(i1 %cond1, ptr %p, i1 %cond2) {
676+
; CHECK-LABEL: @str_transcode0(
677+
; CHECK-NEXT: entry:
678+
; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB3:%.*]], label [[BB1:%.*]]
679+
; CHECK: bb1:
680+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND2:%.*]] to <1 x i1>
681+
; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[P:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> zeroinitializer)
682+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[TMP1]] to i64
683+
; CHECK-NEXT: br label [[BB3]]
684+
; CHECK: bb3:
685+
; CHECK-NEXT: [[Y:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB1]] ]
686+
; CHECK-NEXT: store i64 [[Y]], ptr [[P]], align 8
687+
; CHECK-NEXT: ret i32 0
688+
;
689+
entry:
690+
br i1 %cond1, label %bb3, label %bb1
691+
692+
bb1: ; preds = %entry
693+
br i1 %cond2, label %bb2, label %bb3
694+
695+
bb2: ; preds = %bb1
696+
%x = load i64, ptr %p, align 8
697+
br label %bb3
698+
699+
bb3: ; preds = %bb2, %bb1, %entry
700+
%y = phi i64 [ %x, %bb2 ], [ 0, %bb1 ], [ 0, %entry ]
701+
store i64 %y, ptr %p, align 8
702+
ret i32 0
703+
}
704+
705+
define i32 @succ1to0_phi2(ptr %p, ptr %p2) {
706+
; CHECK-LABEL: @succ1to0_phi2(
707+
; CHECK-NEXT: entry:
708+
; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
709+
; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
710+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
711+
; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
712+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
713+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
714+
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP1]])
715+
; CHECK-NEXT: ret i32 [[TMP3]]
716+
;
717+
entry:
718+
%cond = icmp eq ptr %p, null
719+
br i1 %cond, label %if.true, label %if.false
720+
721+
if.false:
722+
%0 = load i32, ptr %p
723+
store i32 %0, ptr %p2
724+
br label %if.true
725+
726+
if.true:
727+
%res = phi i32 [ %0, %if.false ], [ 0, %entry ]
728+
ret i32 %res
729+
}
730+
677731
declare i32 @read_memory_only() readonly nounwind willreturn speculatable
678732

679733
!llvm.dbg.cu = !{!0}

0 commit comments

Comments
 (0)