Skip to content

Commit 014bf63

Browse files
AMDGPU/GlobalISel: Temporal divergence lowering i1 (#124299)
Use of i1 outside of the cycle, both uniform and divergent, is lane mask(in sgpr) that contains i1 at iteration that lane exited the cycle. Create phi that merges lane mask across all iterations.
1 parent c07e1e3 commit 014bf63

9 files changed

+747
-506
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ class DivergenceLoweringHelper : public PhiLoweringHelper {
8181
void constrainAsLaneMask(Incoming &In) override;
8282

8383
bool lowerTemporalDivergence();
84+
bool lowerTemporalDivergenceI1();
8485
};
8586

8687
DivergenceLoweringHelper::DivergenceLoweringHelper(
@@ -228,6 +229,63 @@ bool DivergenceLoweringHelper::lowerTemporalDivergence() {
228229
return false;
229230
}
230231

232+
bool DivergenceLoweringHelper::lowerTemporalDivergenceI1() {
233+
MachineRegisterInfo::VRegAttrs BoolS1 = {ST->getBoolRC(), LLT::scalar(1)};
234+
initializeLaneMaskRegisterAttributes(BoolS1);
235+
MachineSSAUpdater SSAUpdater(*MF);
236+
237+
// In case of use outside muliple nested cycles or muliple uses we only need
238+
// to merge lane mask across largest relevant cycle.
239+
SmallDenseMap<Register, std::pair<const MachineCycle *, Register>> LRCCache;
240+
for (auto [Reg, UseInst, LRC] : MUI->getTemporalDivergenceList()) {
241+
if (MRI->getType(Reg) != LLT::scalar(1))
242+
continue;
243+
244+
auto [LRCCacheIter, RegNotCached] = LRCCache.try_emplace(Reg);
245+
auto &CycleMergedMask = LRCCacheIter->getSecond();
246+
const MachineCycle *&CachedLRC = CycleMergedMask.first;
247+
if (RegNotCached || LRC->contains(CachedLRC)) {
248+
CachedLRC = LRC;
249+
}
250+
}
251+
252+
for (auto &LRCCacheEntry : LRCCache) {
253+
Register Reg = LRCCacheEntry.first;
254+
auto &CycleMergedMask = LRCCacheEntry.getSecond();
255+
const MachineCycle *Cycle = CycleMergedMask.first;
256+
257+
Register MergedMask = MRI->createVirtualRegister(BoolS1);
258+
SSAUpdater.Initialize(MergedMask);
259+
260+
MachineBasicBlock *MBB = MRI->getVRegDef(Reg)->getParent();
261+
SSAUpdater.AddAvailableValue(MBB, MergedMask);
262+
263+
for (auto Entry : Cycle->getEntries()) {
264+
for (MachineBasicBlock *Pred : Entry->predecessors()) {
265+
if (!Cycle->contains(Pred)) {
266+
B.setInsertPt(*Pred, Pred->getFirstTerminator());
267+
auto ImplDef = B.buildInstr(AMDGPU::IMPLICIT_DEF, {BoolS1}, {});
268+
SSAUpdater.AddAvailableValue(Pred, ImplDef.getReg(0));
269+
}
270+
}
271+
}
272+
273+
buildMergeLaneMasks(*MBB, MBB->getFirstTerminator(), {}, MergedMask,
274+
SSAUpdater.GetValueInMiddleOfBlock(MBB), Reg);
275+
276+
CycleMergedMask.second = MergedMask;
277+
}
278+
279+
for (auto [Reg, UseInst, Cycle] : MUI->getTemporalDivergenceList()) {
280+
if (MRI->getType(Reg) != LLT::scalar(1))
281+
continue;
282+
283+
replaceUsesOfRegInInstWith(Reg, UseInst, LRCCache.lookup(Reg).second);
284+
}
285+
286+
return false;
287+
}
288+
231289
} // End anonymous namespace.
232290

233291
INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
@@ -267,6 +325,12 @@ bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
267325

268326
// Non-i1 temporal divergence lowering.
269327
Changed |= Helper.lowerTemporalDivergence();
328+
// This covers both uniform and divergent i1s. Lane masks are in sgpr and need
329+
// to be updated in each iteration.
330+
Changed |= Helper.lowerTemporalDivergenceI1();
331+
// Temporal divergence lowering of divergent i1 phi used outside of the cycle
332+
// could also be handled by lowerPhis but we do it in lowerTempDivergenceI1
333+
// since in some case lowerPhis does unnecessary lane mask merging.
270334
Changed |= Helper.lowerPhis();
271335
return Changed;
272336
}

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -104,20 +104,25 @@ define void @divergent_i1_phi_used_inside_loop(float %val, ptr %addr) {
104104
; GFX10-NEXT: s_mov_b32 s4, 0
105105
; GFX10-NEXT: s_mov_b32 s5, 1
106106
; GFX10-NEXT: s_mov_b32 s6, 0
107+
; GFX10-NEXT: ; implicit-def: $sgpr7
107108
; GFX10-NEXT: .LBB2_1: ; %loop
108109
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
109110
; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s6
110111
; GFX10-NEXT: s_xor_b32 s5, s5, 1
111-
; GFX10-NEXT: s_add_i32 s6, s6, 1
112+
; GFX10-NEXT: s_and_b32 s8, s5, 1
113+
; GFX10-NEXT: s_cmp_lg_u32 s8, 0
112114
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v0
115+
; GFX10-NEXT: s_cselect_b32 s8, exec_lo, 0
116+
; GFX10-NEXT: s_add_i32 s6, s6, 1
113117
; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4
118+
; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo
119+
; GFX10-NEXT: s_and_b32 s8, exec_lo, s8
120+
; GFX10-NEXT: s_or_b32 s7, s7, s8
114121
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
115122
; GFX10-NEXT: s_cbranch_execnz .LBB2_1
116123
; GFX10-NEXT: ; %bb.2: ; %exit
117124
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
118-
; GFX10-NEXT: s_cmp_lg_u32 s5, 0
119-
; GFX10-NEXT: s_cselect_b32 s4, exec_lo, 0
120-
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4
125+
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s7
121126
; GFX10-NEXT: flat_store_dword v[1:2], v0
122127
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
123128
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -147,37 +152,42 @@ define void @divergent_i1_phi_used_inside_loop_bigger_loop_body(float %val, floa
147152
; GFX10-NEXT: v_mov_b32_e32 v1, 0x3e8
148153
; GFX10-NEXT: s_mov_b32 s5, 0
149154
; GFX10-NEXT: s_mov_b32 s6, 0
155+
; GFX10-NEXT: ; implicit-def: $sgpr7
150156
; GFX10-NEXT: s_branch .LBB3_2
151157
; GFX10-NEXT: .LBB3_1: ; %loop_body
152158
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1
153159
; GFX10-NEXT: v_cvt_f32_u32_e32 v8, s6
154-
; GFX10-NEXT: s_xor_b32 s4, s4, exec_lo
160+
; GFX10-NEXT: s_mov_b32 s8, exec_lo
155161
; GFX10-NEXT: s_add_i32 s6, s6, 1
162+
; GFX10-NEXT: s_xor_b32 s4, s4, s8
156163
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v8, v0
157164
; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5
165+
; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo
166+
; GFX10-NEXT: s_and_b32 s8, exec_lo, s4
167+
; GFX10-NEXT: s_or_b32 s7, s7, s8
158168
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
159169
; GFX10-NEXT: s_cbranch_execz .LBB3_6
160170
; GFX10-NEXT: .LBB3_2: ; %loop_start
161171
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
162172
; GFX10-NEXT: s_cmpk_le_i32 s6, 0x3e8
163-
; GFX10-NEXT: s_mov_b32 s7, 1
173+
; GFX10-NEXT: s_mov_b32 s8, 1
164174
; GFX10-NEXT: s_cbranch_scc0 .LBB3_4
165175
; GFX10-NEXT: ; %bb.3: ; %else
166176
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1
167-
; GFX10-NEXT: s_mov_b32 s7, 0
177+
; GFX10-NEXT: s_mov_b32 s8, 0
168178
; GFX10-NEXT: flat_store_dword v[6:7], v1
169179
; GFX10-NEXT: .LBB3_4: ; %Flow
170180
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1
171-
; GFX10-NEXT: s_xor_b32 s7, s7, 1
172-
; GFX10-NEXT: s_cmp_lg_u32 s7, 0
181+
; GFX10-NEXT: s_xor_b32 s8, s8, 1
182+
; GFX10-NEXT: s_cmp_lg_u32 s8, 0
173183
; GFX10-NEXT: s_cbranch_scc1 .LBB3_1
174184
; GFX10-NEXT: ; %bb.5: ; %if
175185
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1
176186
; GFX10-NEXT: flat_store_dword v[4:5], v1
177187
; GFX10-NEXT: s_branch .LBB3_1
178188
; GFX10-NEXT: .LBB3_6: ; %exit
179189
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
180-
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4
190+
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s7
181191
; GFX10-NEXT: flat_store_dword v[2:3], v0
182192
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
183193
; GFX10-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -201,28 +201,35 @@ body: |
201201
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
202202
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
203203
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
204+
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
204205
; GFX10-NEXT: {{ $}}
205206
; GFX10-NEXT: bb.1:
206207
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
207208
; GFX10-NEXT: {{ $}}
208-
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
209-
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
210-
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
209+
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %19(s1), %bb.1
210+
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
211+
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
212+
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
213+
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
211214
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
212-
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
213-
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
215+
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
216+
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
217+
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
214218
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
215219
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
216-
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
217-
; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
220+
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
221+
; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
222+
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
223+
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
224+
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
218225
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
219226
; GFX10-NEXT: G_BR %bb.2
220227
; GFX10-NEXT: {{ $}}
221228
; GFX10-NEXT: bb.2:
222229
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
223230
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
224231
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
225-
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]]
232+
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_]](s1), [[C5]], [[C4]]
226233
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
227234
; GFX10-NEXT: SI_RETURN
228235
bb.0:
@@ -285,27 +292,30 @@ body: |
285292
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
286293
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
287294
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
295+
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
288296
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
289297
; GFX10-NEXT: {{ $}}
290298
; GFX10-NEXT: bb.1:
291299
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
292300
; GFX10-NEXT: {{ $}}
293-
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %37(s1), %bb.5
294-
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C1]](s32), %bb.0
295-
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %17(s32), %bb.5
301+
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %44(s1), %bb.5
302+
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %36(s1), %bb.5
303+
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C1]](s32), %bb.0
304+
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %17(s32), %bb.5
296305
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
306+
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
297307
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
298-
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI2]](s32), [[C2]]
308+
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI3]](s32), [[C2]]
299309
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
300310
; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
301311
; GFX10-NEXT: G_BR %bb.2
302312
; GFX10-NEXT: {{ $}}
303313
; GFX10-NEXT: bb.2:
304314
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
305315
; GFX10-NEXT: {{ $}}
306-
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C3]](s1), %bb.1
316+
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C3]](s1), %bb.1
307317
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
308-
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C4]]
318+
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI4]], [[C4]]
309319
; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.5
310320
; GFX10-NEXT: G_BR %bb.3
311321
; GFX10-NEXT: {{ $}}
@@ -329,20 +339,24 @@ body: |
329339
; GFX10-NEXT: {{ $}}
330340
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
331341
; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[COPY9]], [[C8]]
332-
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
342+
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
343+
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
333344
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
334345
; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
335-
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C9]]
336-
; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
337-
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
346+
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C9]]
347+
; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
348+
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
349+
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
350+
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
351+
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
338352
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
339353
; GFX10-NEXT: G_BR %bb.6
340354
; GFX10-NEXT: {{ $}}
341355
; GFX10-NEXT: bb.6:
342356
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
343357
; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
344358
; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
345-
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR1]](s1), [[C11]], [[C10]]
359+
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_]](s1), [[C11]], [[C10]]
346360
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
347361
; GFX10-NEXT: SI_RETURN
348362
bb.0:

0 commit comments

Comments
 (0)