Skip to content

Commit 5e06268

Browse files
AMDGPU/GlobalISel: Temporal divergence lowering i1
Use of i1 outside of the cycle, both uniform and divergent, is lane mask(in sgpr) that contains i1 at iteration that lane exited the cycle. Create phi that merges lane mask across all iterations.
1 parent a5c340d commit 5e06268

9 files changed

+619
-434
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ class DivergenceLoweringHelper : public PhiLoweringHelper {
8080
void constrainAsLaneMask(Incoming &In) override;
8181

8282
bool lowerTemporalDivergence();
83+
bool lowerTemporalDivergenceI1();
8384
};
8485

8586
DivergenceLoweringHelper::DivergenceLoweringHelper(
@@ -219,6 +220,54 @@ bool DivergenceLoweringHelper::lowerTemporalDivergence() {
219220
return false;
220221
}
221222

223+
bool DivergenceLoweringHelper::lowerTemporalDivergenceI1() {
224+
MachineRegisterInfo::VRegAttrs BoolS1 = {ST->getBoolRC(), LLT::scalar(1)};
225+
initializeLaneMaskRegisterAttributes(BoolS1);
226+
227+
for (auto [Inst, UseInst, Cycle] : MUI->getTemporalDivergenceList()) {
228+
Register Reg = Inst->getOperand(0).getReg();
229+
if (MRI->getType(Reg) != LLT::scalar(1))
230+
continue;
231+
232+
Register MergedMask = MRI->createVirtualRegister(BoolS1);
233+
Register PrevIterMask = MRI->createVirtualRegister(BoolS1);
234+
235+
MachineBasicBlock *CycleHeaderMBB = Cycle->getHeader();
236+
SmallVector<MachineBasicBlock *, 1> ExitingBlocks;
237+
Cycle->getExitingBlocks(ExitingBlocks);
238+
assert(ExitingBlocks.size() == 1);
239+
MachineBasicBlock *CycleExitingMBB = ExitingBlocks[0];
240+
241+
B.setInsertPt(*CycleHeaderMBB, CycleHeaderMBB->begin());
242+
auto CrossIterPHI = B.buildInstr(AMDGPU::PHI).addDef(PrevIterMask);
243+
244+
// We only care about cycle iterration path - merge Reg with previous
245+
// iteration. For other incomings use implicit def.
246+
// Predecessors should be CyclePredecessor and CycleExitingMBB.
247+
// In older versions of irreducible control flow lowering there could be
248+
// cases with more predecessors. To keep this lowering as generic as
249+
// possible also handle those cases.
250+
for (auto MBB : CycleHeaderMBB->predecessors()) {
251+
if (MBB == CycleExitingMBB) {
252+
CrossIterPHI.addReg(MergedMask);
253+
} else {
254+
B.setInsertPt(*MBB, MBB->getFirstTerminator());
255+
auto ImplDef = B.buildInstr(AMDGPU::IMPLICIT_DEF, {BoolS1}, {});
256+
CrossIterPHI.addReg(ImplDef.getReg(0));
257+
}
258+
CrossIterPHI.addMBB(MBB);
259+
}
260+
261+
MachineBasicBlock *MBB = Inst->getParent();
262+
buildMergeLaneMasks(*MBB, MBB->getFirstTerminator(), {}, MergedMask,
263+
PrevIterMask, Reg);
264+
265+
replaceUsesOfRegInInstWith(Reg, UseInst, MergedMask);
266+
}
267+
268+
return false;
269+
}
270+
222271
} // End anonymous namespace.
223272

224273
INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
@@ -258,6 +307,12 @@ bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
258307

259308
// Non-i1 temporal divergence lowering.
260309
Changed |= Helper.lowerTemporalDivergence();
310+
// This covers both uniform and divergent i1s. Lane masks are in sgpr and need
311+
// to be updated in each iteration.
312+
Changed |= Helper.lowerTemporalDivergenceI1();
313+
// Temporal divergence lowering of divergent i1 phi used outside of the cycle
314+
// could also be handled by lowerPhis but we do it in lowerTempDivergenceI1
315+
// since in some case lowerPhis does unnecessary lane mask merging.
261316
Changed |= Helper.lowerPhis();
262317
return Changed;
263318
}

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -104,20 +104,25 @@ define void @divergent_i1_phi_used_inside_loop(float %val, ptr %addr) {
104104
; GFX10-NEXT: s_mov_b32 s4, 0
105105
; GFX10-NEXT: s_mov_b32 s5, 1
106106
; GFX10-NEXT: s_mov_b32 s6, 0
107+
; GFX10-NEXT: ; implicit-def: $sgpr7
107108
; GFX10-NEXT: .LBB2_1: ; %loop
108109
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
109110
; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s6
110111
; GFX10-NEXT: s_xor_b32 s5, s5, 1
111-
; GFX10-NEXT: s_add_i32 s6, s6, 1
112+
; GFX10-NEXT: s_and_b32 s8, s5, 1
113+
; GFX10-NEXT: s_cmp_lg_u32 s8, 0
112114
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v0
115+
; GFX10-NEXT: s_cselect_b32 s8, exec_lo, 0
116+
; GFX10-NEXT: s_add_i32 s6, s6, 1
113117
; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4
118+
; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo
119+
; GFX10-NEXT: s_and_b32 s8, exec_lo, s8
120+
; GFX10-NEXT: s_or_b32 s7, s7, s8
114121
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
115122
; GFX10-NEXT: s_cbranch_execnz .LBB2_1
116123
; GFX10-NEXT: ; %bb.2: ; %exit
117124
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
118-
; GFX10-NEXT: s_cmp_lg_u32 s5, 0
119-
; GFX10-NEXT: s_cselect_b32 s4, exec_lo, 0
120-
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4
125+
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s7
121126
; GFX10-NEXT: flat_store_dword v[1:2], v0
122127
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
123128
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -147,37 +152,42 @@ define void @divergent_i1_phi_used_inside_loop_bigger_loop_body(float %val, floa
147152
; GFX10-NEXT: v_mov_b32_e32 v1, 0x3e8
148153
; GFX10-NEXT: s_mov_b32 s5, 0
149154
; GFX10-NEXT: s_mov_b32 s6, 0
155+
; GFX10-NEXT: ; implicit-def: $sgpr7
150156
; GFX10-NEXT: s_branch .LBB3_2
151157
; GFX10-NEXT: .LBB3_1: ; %loop_body
152158
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1
153159
; GFX10-NEXT: v_cvt_f32_u32_e32 v8, s6
154-
; GFX10-NEXT: s_xor_b32 s4, s4, exec_lo
160+
; GFX10-NEXT: s_mov_b32 s8, exec_lo
155161
; GFX10-NEXT: s_add_i32 s6, s6, 1
162+
; GFX10-NEXT: s_xor_b32 s4, s4, s8
156163
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v8, v0
157164
; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5
165+
; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo
166+
; GFX10-NEXT: s_and_b32 s8, exec_lo, s4
167+
; GFX10-NEXT: s_or_b32 s7, s7, s8
158168
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
159169
; GFX10-NEXT: s_cbranch_execz .LBB3_6
160170
; GFX10-NEXT: .LBB3_2: ; %loop_start
161171
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
162172
; GFX10-NEXT: s_cmpk_le_i32 s6, 0x3e8
163-
; GFX10-NEXT: s_mov_b32 s7, 1
173+
; GFX10-NEXT: s_mov_b32 s8, 1
164174
; GFX10-NEXT: s_cbranch_scc0 .LBB3_4
165175
; GFX10-NEXT: ; %bb.3: ; %else
166176
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1
167-
; GFX10-NEXT: s_mov_b32 s7, 0
177+
; GFX10-NEXT: s_mov_b32 s8, 0
168178
; GFX10-NEXT: flat_store_dword v[6:7], v1
169179
; GFX10-NEXT: .LBB3_4: ; %Flow
170180
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1
171-
; GFX10-NEXT: s_xor_b32 s7, s7, 1
172-
; GFX10-NEXT: s_cmp_lg_u32 s7, 0
181+
; GFX10-NEXT: s_xor_b32 s8, s8, 1
182+
; GFX10-NEXT: s_cmp_lg_u32 s8, 0
173183
; GFX10-NEXT: s_cbranch_scc1 .LBB3_1
174184
; GFX10-NEXT: ; %bb.5: ; %if
175185
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1
176186
; GFX10-NEXT: flat_store_dword v[4:5], v1
177187
; GFX10-NEXT: s_branch .LBB3_1
178188
; GFX10-NEXT: .LBB3_6: ; %exit
179189
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
180-
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4
190+
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s7
181191
; GFX10-NEXT: flat_store_dword v[2:3], v0
182192
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
183193
; GFX10-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -201,28 +201,35 @@ body: |
201201
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
202202
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
203203
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
204+
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
204205
; GFX10-NEXT: {{ $}}
205206
; GFX10-NEXT: bb.1:
206207
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
207208
; GFX10-NEXT: {{ $}}
208-
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
209-
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
210-
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
209+
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %19(s1), %bb.1
210+
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
211+
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
212+
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
213+
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
211214
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
212-
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
213-
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
215+
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
216+
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
217+
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
214218
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
215219
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
216-
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
217-
; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
220+
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
221+
; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
222+
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
223+
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
224+
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
218225
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
219226
; GFX10-NEXT: G_BR %bb.2
220227
; GFX10-NEXT: {{ $}}
221228
; GFX10-NEXT: bb.2:
222229
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
223230
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
224231
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
225-
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]]
232+
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_]](s1), [[C5]], [[C4]]
226233
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
227234
; GFX10-NEXT: SI_RETURN
228235
bb.0:
@@ -285,27 +292,30 @@ body: |
285292
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
286293
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
287294
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
295+
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
288296
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
289297
; GFX10-NEXT: {{ $}}
290298
; GFX10-NEXT: bb.1:
291299
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
292300
; GFX10-NEXT: {{ $}}
293-
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %37(s1), %bb.5
294-
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C1]](s32), %bb.0
295-
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %17(s32), %bb.5
301+
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %44(s1), %bb.5
302+
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %36(s1), %bb.5
303+
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C1]](s32), %bb.0
304+
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %17(s32), %bb.5
296305
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
306+
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
297307
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
298-
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI2]](s32), [[C2]]
308+
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI3]](s32), [[C2]]
299309
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
300310
; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
301311
; GFX10-NEXT: G_BR %bb.2
302312
; GFX10-NEXT: {{ $}}
303313
; GFX10-NEXT: bb.2:
304314
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
305315
; GFX10-NEXT: {{ $}}
306-
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C3]](s1), %bb.1
316+
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C3]](s1), %bb.1
307317
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
308-
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C4]]
318+
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI4]], [[C4]]
309319
; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.5
310320
; GFX10-NEXT: G_BR %bb.3
311321
; GFX10-NEXT: {{ $}}
@@ -329,20 +339,24 @@ body: |
329339
; GFX10-NEXT: {{ $}}
330340
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
331341
; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[COPY9]], [[C8]]
332-
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
342+
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
343+
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
333344
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
334345
; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
335-
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C9]]
336-
; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
337-
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
346+
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C9]]
347+
; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
348+
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
349+
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
350+
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
351+
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
338352
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
339353
; GFX10-NEXT: G_BR %bb.6
340354
; GFX10-NEXT: {{ $}}
341355
; GFX10-NEXT: bb.6:
342356
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
343357
; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
344358
; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
345-
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR1]](s1), [[C11]], [[C10]]
359+
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_]](s1), [[C11]], [[C10]]
346360
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
347361
; GFX10-NEXT: SI_RETURN
348362
bb.0:

0 commit comments

Comments
 (0)