Skip to content

Commit a3e0033

Browse files
committed
[DAGCombiner] Freeze maybe poison operands when folding select to logic
Just like for regular IR we need to treat SELECT as conditionally blocking poison. So (unless the condition itself is poison) the result is only poison if the selected true/false value is poison. Thus, when doing DAG combines that turn SELECT into arithmetic/logical operations (e.g. AND/OR) we need to make sure that the new operations aren't more poisonous. One way to do that is to use FREEZE to make sure the operands aren't posion. This patch aims at fixing the kind of miscompiles reported in #84653 and #85190 Solution is to make sure that we insert FREEZE, if needed to make the fold sound, when using the foldBoolSelectToLogic and foldVSelectToSignBitSplatMask DAG combines. This may result in some (hopefully minor) regressions since we lack some ways to fold away the freeze (or due to isGuaranteedNotToBePoison being too pessimistic). Focus in this patch is to just avoid miscompiles, but I think some of the regressions can be avoided by general improvements regarding poison/freeze handling in SelectionDAG.
1 parent 67aec0c commit a3e0033

19 files changed

+321
-300
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11473,28 +11473,28 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
1147311473
if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
1147411474
return SDValue();
1147511475

11476-
// select Cond, Cond, F --> or Cond, F
11477-
// select Cond, 1, F --> or Cond, F
11476+
// select Cond, Cond, F --> or Cond, freeze(F)
11477+
// select Cond, 1, F --> or Cond, freeze(F)
1147811478
if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11479-
return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
11479+
return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, DAG.getFreeze(F));
1148011480

1148111481
// select Cond, T, Cond --> and Cond, T
1148211482
// select Cond, T, 0 --> and Cond, T
1148311483
if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11484-
return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
11484+
return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, DAG.getFreeze(T));
1148511485

1148611486
// select Cond, T, 1 --> or (not Cond), T
1148711487
if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
1148811488
SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
1148911489
DAG.getAllOnesConstant(SDLoc(N), VT));
11490-
return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
11490+
return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, DAG.getFreeze(T));
1149111491
}
1149211492

1149311493
// select Cond, 0, F --> and (not Cond), F
1149411494
if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
1149511495
SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
1149611496
DAG.getAllOnesConstant(SDLoc(N), VT));
11497-
return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
11497+
return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, DAG.getFreeze(F));
1149811498
}
1149911499

1150011500
return SDValue();
@@ -11528,15 +11528,15 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
1152811528
SDLoc DL(N);
1152911529
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
1153011530
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11531-
return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11531+
return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
1153211532
}
1153311533

1153411534
// (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
1153511535
if (isAllOnesOrAllOnesSplat(N1)) {
1153611536
SDLoc DL(N);
1153711537
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
1153811538
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11539-
return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11539+
return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
1154011540
}
1154111541

1154211542
// If we have to invert the sign bit mask, only do that transform if the
@@ -11548,7 +11548,7 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
1154811548
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
1154911549
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
1155011550
SDValue Not = DAG.getNOT(DL, Sra, VT);
11551-
return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11551+
return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
1155211552
}
1155311553

1155411554
// TODO: There's another pattern in this family, but it may require

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,8 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
236236
; CHECK-NEXT: mov z7.d, z0.d
237237
; CHECK-NEXT: add x9, x9, x11
238238
; CHECK-NEXT: add x8, x8, x12
239-
; CHECK-NEXT: cmpne p2.d, p1/z, z2.d, #0
239+
; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
240+
; CHECK-NEXT: and p2.b, p1/z, p1.b, p2.b
240241
; CHECK-NEXT: zip2 p1.d, p2.d, p2.d
241242
; CHECK-NEXT: zip1 p2.d, p2.d, p2.d
242243
; CHECK-NEXT: ld1d { z2.d }, p1/z, [x13, #1, mul vl]

llvm/test/CodeGen/AArch64/fast-isel-select.ll

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
2-
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
3-
; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL
1+
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,SISEL
2+
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,FISEL
3+
; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GISEL
44

55
; First test the different supported value types for select.
66
define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
@@ -295,22 +295,28 @@ define float @select_icmp_sle(i32 %x, i32 %y, float %a, float %b) {
295295
; Test peephole optimizations for select.
296296
define zeroext i1 @select_opt1(i1 zeroext %c, i1 zeroext %a) {
297297
; CHECK-LABEL: select_opt1
298-
; CHECK: orr {{w[0-9]+}}, w0, w1
298+
; SISEL: orr [[REG:w[0-9]+]], w0, w1
299+
; SISEL: and w0, [[REG]], #0x1
300+
; FISEL: orr {{w[0-9]+}}, w0, w1
299301
%1 = select i1 %c, i1 true, i1 %a
300302
ret i1 %1
301303
}
302304

303305
define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
304306
; CHECK-LABEL: select_opt2
305-
; CHECK: eor [[REG:w[0-9]+]], w0, #0x1
306-
; CHECK: orr {{w[0-9]+}}, [[REG]], w1
307+
; SISEL: orn [[REG:w[0-9]+]], w1, w0
308+
; SISEL: and w0, [[REG]], #0x1
309+
; FISEL: eor [[REG:w[0-9]+]], w0, #0x1
310+
; FISEL: orr {{w[0-9]+}}, [[REG]], w1
307311
%1 = select i1 %c, i1 %a, i1 true
308312
ret i1 %1
309313
}
310314

311315
define zeroext i1 @select_opt3(i1 zeroext %c, i1 zeroext %a) {
312316
; CHECK-LABEL: select_opt3
313-
; CHECK: bic {{w[0-9]+}}, w1, w0
317+
; SISEL: eor [[REG:w[0-9]+]], w0, #0x1
318+
; SISEL: and w0, [[REG]], w1
319+
; FISEL: bic {{w[0-9]+}}, w1, w0
314320
%1 = select i1 %c, i1 false, i1 %a
315321
ret i1 %1
316322
}

llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,9 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
319319
define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
320320
; CHECK-LABEL: ctz_and_nxv16i1:
321321
; CHECK: // %bb.0:
322-
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
323322
; CHECK-NEXT: ptrue p1.b
323+
; CHECK-NEXT: cmpne p2.b, p1/z, z0.b, z1.b
324+
; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b
324325
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
325326
; CHECK-NEXT: cntp x0, p0, p0.b
326327
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0

llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
2424
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s
2525
; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s
2626
; CHECK-NEXT: add z0.d, z2.d, z1.d
27-
; CHECK-NEXT: bic p2.b, p1/z, p1.b, p2.b
27+
; CHECK-NEXT: not p2.b, p0/z, p2.b
28+
; CHECK-NEXT: and p2.b, p1/z, p1.b, p2.b
2829
; CHECK-NEXT: mov z0.d, p2/m, z2.d
2930
; CHECK-NEXT: sel z0.d, p1, z0.d, z2.d
3031
; CHECK-NEXT: uaddv d0, p0, z0.d

llvm/test/CodeGen/AMDGPU/div_i128.ll

Lines changed: 24 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -476,28 +476,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
476476
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
477477
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
478478
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
479+
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
480+
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
481+
; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
479482
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
480483
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
481484
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
482485
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
483-
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
484-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
485-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
486-
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
487-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
488-
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
489-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
490-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
486+
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
491487
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
492-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
493-
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
494-
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
488+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
489+
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
495490
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
496-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
497-
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
491+
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
498492
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
499-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
500-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
493+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
501494
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
502495
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
503496
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
@@ -508,7 +501,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
508501
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
509502
; GFX9-O0-NEXT: s_mov_b32 s14, s13
510503
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
511-
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
512504
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
513505
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
514506
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -1042,10 +1034,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
10421034
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
10431035
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
10441036
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1045-
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1046-
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1047-
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1048-
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1037+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1038+
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1039+
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1040+
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
10491041
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
10501042
; GFX9-O0-NEXT: s_mov_b32 s5, s6
10511043
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
@@ -2747,28 +2739,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
27472739
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
27482740
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
27492741
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
2742+
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2743+
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2744+
; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
27502745
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
27512746
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
27522747
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
27532748
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2754-
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
2755-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2756-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2757-
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2758-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2759-
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
2760-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2761-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2749+
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
27622750
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2763-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2764-
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2765-
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
2751+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2752+
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
27662753
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2767-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2768-
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
2754+
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
27692755
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
2770-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2771-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
2756+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
27722757
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
27732758
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
27742759
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
@@ -2779,7 +2764,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
27792764
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
27802765
; GFX9-O0-NEXT: s_mov_b32 s14, s13
27812766
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
2782-
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
27832767
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
27842768
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
27852769
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -3313,10 +3297,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
33133297
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
33143298
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
33153299
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3316-
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3317-
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3318-
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3319-
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3300+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3301+
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3302+
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3303+
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
33203304
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
33213305
; GFX9-O0-NEXT: s_mov_b32 s5, s6
33223306
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)

0 commit comments

Comments
 (0)