Skip to content

Commit 25e5974

Browse files
committed
[SelectionDAG] Make (a & x) | (~a & y) -> (a & (x ^ y)) ^ y available for all targets
update affected tests formatting pre-commit systemz test fix systemz hasandnot fix random tablegen test move hasAndNot Update llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td
1 parent f615a5c commit 25e5974

18 files changed

+993
-1154
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8110,6 +8110,59 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
81108110
return SDValue();
81118111
}
81128112

8113+
static SDValue foldMaskedMergeImpl(SDValue AndL0, SDValue AndR0, SDValue AndL1,
8114+
SDValue AndR1, const SDLoc &DL,
8115+
SelectionDAG &DAG) {
8116+
if (!isBitwiseNot(AndL0, true) || !AndL0->hasOneUse())
8117+
return SDValue();
8118+
SDValue NotOp = AndL0->getOperand(0);
8119+
if (NotOp == AndR1)
8120+
std::swap(AndR1, AndL1);
8121+
if (NotOp != AndL1)
8122+
return SDValue();
8123+
8124+
EVT VT = AndL1->getValueType(0);
8125+
SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, AndR1, AndR0);
8126+
SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
8127+
SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, AndR0);
8128+
return Xor1;
8129+
}
8130+
8131+
/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
8132+
/// equivalent `((x ^ y) & m) ^ y)` pattern.
8133+
/// This is typically a better representation for targets without a fused
8134+
/// "and-not" operation.
8135+
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG,
8136+
const TargetLowering &TLI, const SDLoc &DL) {
8137+
// Note that masked-merge variants using XOR or ADD expressions are
8138+
// normalized to OR by InstCombine so we only check for OR.
8139+
assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
8140+
SDValue N0 = Node->getOperand(0);
8141+
if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
8142+
return SDValue();
8143+
SDValue N1 = Node->getOperand(1);
8144+
if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
8145+
return SDValue();
8146+
8147+
// If the target supports and-not, don't fold this.
8148+
if (TLI.hasAndNot(SDValue(Node, 0)))
8149+
return SDValue();
8150+
8151+
SDValue N00 = N0->getOperand(0);
8152+
SDValue N01 = N0->getOperand(1);
8153+
SDValue N10 = N1->getOperand(0);
8154+
SDValue N11 = N1->getOperand(1);
8155+
if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
8156+
return Result;
8157+
if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
8158+
return Result;
8159+
if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
8160+
return Result;
8161+
if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
8162+
return Result;
8163+
return SDValue();
8164+
}
8165+
81138166
SDValue DAGCombiner::visitOR(SDNode *N) {
81148167
SDValue N0 = N->getOperand(0);
81158168
SDValue N1 = N->getOperand(1);
@@ -8288,6 +8341,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
82888341
if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
82898342
return R;
82908343

8344+
if (VT.isScalarInteger() && VT != MVT::i1)
8345+
if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8346+
return R;
8347+
82918348
return SDValue();
82928349
}
82938350

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1296,6 +1296,20 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
12961296
return true;
12971297
}
12981298

1299+
bool SystemZTargetLowering::hasAndNot(SDValue Y) const {
1300+
EVT VT = Y.getValueType();
1301+
1302+
// We can use NC(G)RK for types in GPRs ...
1303+
if (VT == MVT::i32 || VT == MVT::i64)
1304+
return Subtarget.hasMiscellaneousExtensions3();
1305+
1306+
// ... or VNC for types in VRs.
1307+
if (VT.isVector() || VT == MVT::i128)
1308+
return Subtarget.hasVector();
1309+
1310+
return false;
1311+
}
1312+
12991313
// Information about the addressing mode for a memory access.
13001314
struct AddressingMode {
13011315
// True if a long displacement is supported.

llvm/lib/Target/SystemZ/SystemZISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,7 @@ class SystemZTargetLowering : public TargetLowering {
671671
}
672672

673673
unsigned getStackProbeSize(const MachineFunction &MF) const;
674+
bool hasAndNot(SDValue Y) const override;
674675

675676
private:
676677
const SystemZSubtarget &Subtarget;

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -52089,59 +52089,6 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
5208952089
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
5209052090
}
5209152091

52092-
static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
52093-
SDValue And1_L, SDValue And1_R,
52094-
const SDLoc &DL, SelectionDAG &DAG) {
52095-
if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse())
52096-
return SDValue();
52097-
SDValue NotOp = And0_L->getOperand(0);
52098-
if (NotOp == And1_R)
52099-
std::swap(And1_R, And1_L);
52100-
if (NotOp != And1_L)
52101-
return SDValue();
52102-
52103-
// (~(NotOp) & And0_R) | (NotOp & And1_R)
52104-
// --> ((And0_R ^ And1_R) & NotOp) ^ And1_R
52105-
EVT VT = And1_L->getValueType(0);
52106-
SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R);
52107-
SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R);
52108-
SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
52109-
SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R);
52110-
return Xor1;
52111-
}
52112-
52113-
/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
52114-
/// equivalent `((x ^ y) & m) ^ y)` pattern.
52115-
/// This is typically a better representation for targets without a fused
52116-
/// "and-not" operation. This function is intended to be called from a
52117-
/// `TargetLowering::PerformDAGCombine` callback on `ISD::OR` nodes.
52118-
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) {
52119-
// Note that masked-merge variants using XOR or ADD expressions are
52120-
// normalized to OR by InstCombine so we only check for OR.
52121-
assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
52122-
SDValue N0 = Node->getOperand(0);
52123-
if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
52124-
return SDValue();
52125-
SDValue N1 = Node->getOperand(1);
52126-
if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
52127-
return SDValue();
52128-
52129-
SDLoc DL(Node);
52130-
SDValue N00 = N0->getOperand(0);
52131-
SDValue N01 = N0->getOperand(1);
52132-
SDValue N10 = N1->getOperand(0);
52133-
SDValue N11 = N1->getOperand(1);
52134-
if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
52135-
return Result;
52136-
if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
52137-
return Result;
52138-
if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
52139-
return Result;
52140-
if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
52141-
return Result;
52142-
return SDValue();
52143-
}
52144-
5214552092
/// If this is an add or subtract where one operand is produced by a cmp+setcc,
5214652093
/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
5214752094
/// with CMP+{ADC, SBB}.
@@ -52545,11 +52492,6 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
5254552492
}
5254652493
}
5254752494

52548-
// We should fold "masked merge" patterns when `andn` is not available.
52549-
if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1)
52550-
if (SDValue R = foldMaskedMerge(N, DAG))
52551-
return R;
52552-
5255352495
if (SDValue R = combineOrXorWithSETCC(N->getOpcode(), dl, VT, N0, N1, DAG))
5255452496
return R;
5255552497

llvm/test/CodeGen/AMDGPU/bfi_int.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
1616
; GFX7-NEXT: s_mov_b32 s7, 0xf000
1717
; GFX7-NEXT: s_mov_b32 s6, -1
1818
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
19-
; GFX7-NEXT: s_andn2_b32 s2, s2, s0
19+
; GFX7-NEXT: s_xor_b32 s1, s1, s2
2020
; GFX7-NEXT: s_and_b32 s0, s1, s0
21-
; GFX7-NEXT: s_or_b32 s0, s2, s0
21+
; GFX7-NEXT: s_xor_b32 s0, s0, s2
2222
; GFX7-NEXT: v_mov_b32_e32 v0, s0
2323
; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
2424
; GFX7-NEXT: s_endpgm
@@ -28,9 +28,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
2828
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
2929
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
3030
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
31-
; GFX8-NEXT: s_andn2_b32 s2, s2, s0
31+
; GFX8-NEXT: s_xor_b32 s1, s1, s2
3232
; GFX8-NEXT: s_and_b32 s0, s1, s0
33-
; GFX8-NEXT: s_or_b32 s0, s2, s0
33+
; GFX8-NEXT: s_xor_b32 s0, s0, s2
3434
; GFX8-NEXT: v_mov_b32_e32 v0, s4
3535
; GFX8-NEXT: v_mov_b32_e32 v1, s5
3636
; GFX8-NEXT: v_mov_b32_e32 v2, s0
@@ -44,9 +44,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
4444
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
4545
; GFX10-NEXT: v_mov_b32_e32 v0, 0
4646
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
47-
; GFX10-NEXT: s_andn2_b32 s2, s2, s0
47+
; GFX10-NEXT: s_xor_b32 s1, s1, s2
4848
; GFX10-NEXT: s_and_b32 s0, s1, s0
49-
; GFX10-NEXT: s_or_b32 s0, s2, s0
49+
; GFX10-NEXT: s_xor_b32 s0, s0, s2
5050
; GFX10-NEXT: v_mov_b32_e32 v1, s0
5151
; GFX10-NEXT: global_store_dword v0, v1, s[4:5]
5252
; GFX10-NEXT: s_endpgm
@@ -1407,9 +1407,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
14071407
; GFX7-NEXT: s_mov_b32 s7, 0xf000
14081408
; GFX7-NEXT: s_mov_b32 s6, -1
14091409
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1410-
; GFX7-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
1411-
; GFX7-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
1412-
; GFX7-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1410+
; GFX7-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1411+
; GFX7-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
1412+
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
14131413
; GFX7-NEXT: s_add_u32 s0, s0, 10
14141414
; GFX7-NEXT: s_addc_u32 s1, s1, 0
14151415
; GFX7-NEXT: v_mov_b32_e32 v0, s0
@@ -1422,9 +1422,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
14221422
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
14231423
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
14241424
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1425-
; GFX8-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
1426-
; GFX8-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
1427-
; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1425+
; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1426+
; GFX8-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
1427+
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
14281428
; GFX8-NEXT: s_add_u32 s0, s0, 10
14291429
; GFX8-NEXT: s_addc_u32 s1, s1, 0
14301430
; GFX8-NEXT: v_mov_b32_e32 v0, s0
@@ -1438,9 +1438,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
14381438
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
14391439
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
14401440
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1441-
; GFX10-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
1442-
; GFX10-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
1443-
; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1441+
; GFX10-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1442+
; GFX10-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
1443+
; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
14441444
; GFX10-NEXT: s_add_u32 s0, s0, 10
14451445
; GFX10-NEXT: s_addc_u32 s1, s1, 0
14461446
; GFX10-NEXT: v_mov_b32_e32 v0, s0

llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -289,16 +289,16 @@ entry:
289289
define amdgpu_kernel void @half4_inselt(ptr addrspace(1) %out, <4 x half> %vec, i32 %sel) {
290290
; GCN-LABEL: half4_inselt:
291291
; GCN: ; %bb.0: ; %entry
292-
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
293292
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
293+
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
294294
; GCN-NEXT: s_mov_b32 s4, 0x3c003c00
295295
; GCN-NEXT: s_mov_b32 s5, s4
296296
; GCN-NEXT: s_waitcnt lgkmcnt(0)
297+
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
297298
; GCN-NEXT: s_lshl_b32 s6, s6, 4
298299
; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
299-
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
300-
; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
301-
; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
300+
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
301+
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
302302
; GCN-NEXT: v_mov_b32_e32 v0, s0
303303
; GCN-NEXT: v_mov_b32_e32 v2, s2
304304
; GCN-NEXT: v_mov_b32_e32 v1, s1
@@ -317,10 +317,10 @@ define amdgpu_kernel void @half2_inselt(ptr addrspace(1) %out, <2 x half> %vec,
317317
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
318318
; GCN-NEXT: s_waitcnt lgkmcnt(0)
319319
; GCN-NEXT: s_lshl_b32 s3, s3, 4
320+
; GCN-NEXT: s_xor_b32 s4, s2, 0x3c003c00
320321
; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
321-
; GCN-NEXT: s_andn2_b32 s2, s2, s3
322-
; GCN-NEXT: s_and_b32 s3, s3, 0x3c003c00
323-
; GCN-NEXT: s_or_b32 s2, s3, s2
322+
; GCN-NEXT: s_and_b32 s3, s4, s3
323+
; GCN-NEXT: s_xor_b32 s2, s3, s2
324324
; GCN-NEXT: v_mov_b32_e32 v0, s0
325325
; GCN-NEXT: v_mov_b32_e32 v1, s1
326326
; GCN-NEXT: v_mov_b32_e32 v2, s2
@@ -399,10 +399,10 @@ define amdgpu_kernel void @short2_inselt(ptr addrspace(1) %out, <2 x i16> %vec,
399399
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
400400
; GCN-NEXT: s_waitcnt lgkmcnt(0)
401401
; GCN-NEXT: s_lshl_b32 s3, s3, 4
402+
; GCN-NEXT: s_xor_b32 s4, s2, 0x10001
402403
; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
403-
; GCN-NEXT: s_andn2_b32 s2, s2, s3
404-
; GCN-NEXT: s_and_b32 s3, s3, 0x10001
405-
; GCN-NEXT: s_or_b32 s2, s3, s2
404+
; GCN-NEXT: s_and_b32 s3, s4, s3
405+
; GCN-NEXT: s_xor_b32 s2, s3, s2
406406
; GCN-NEXT: v_mov_b32_e32 v0, s0
407407
; GCN-NEXT: v_mov_b32_e32 v1, s1
408408
; GCN-NEXT: v_mov_b32_e32 v2, s2
@@ -417,16 +417,16 @@ entry:
417417
define amdgpu_kernel void @short4_inselt(ptr addrspace(1) %out, <4 x i16> %vec, i32 %sel) {
418418
; GCN-LABEL: short4_inselt:
419419
; GCN: ; %bb.0: ; %entry
420-
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
421420
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
421+
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
422422
; GCN-NEXT: s_mov_b32 s4, 0x10001
423423
; GCN-NEXT: s_mov_b32 s5, s4
424424
; GCN-NEXT: s_waitcnt lgkmcnt(0)
425+
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
425426
; GCN-NEXT: s_lshl_b32 s6, s6, 4
426427
; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
427-
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
428-
; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
429-
; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
428+
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
429+
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
430430
; GCN-NEXT: v_mov_b32_e32 v0, s0
431431
; GCN-NEXT: v_mov_b32_e32 v2, s2
432432
; GCN-NEXT: v_mov_b32_e32 v1, s1
@@ -442,15 +442,15 @@ entry:
442442
define amdgpu_kernel void @byte8_inselt(ptr addrspace(1) %out, <8 x i8> %vec, i32 %sel) {
443443
; GCN-LABEL: byte8_inselt:
444444
; GCN: ; %bb.0: ; %entry
445-
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
446445
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
446+
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
447447
; GCN-NEXT: s_waitcnt lgkmcnt(0)
448-
; GCN-NEXT: s_lshl_b32 s4, s6, 3
449-
; GCN-NEXT: s_lshl_b64 s[4:5], 0xff, s4
450-
; GCN-NEXT: s_and_b32 s7, s5, 0x1010101
451-
; GCN-NEXT: s_and_b32 s6, s4, 0x1010101
452-
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
453-
; GCN-NEXT: s_or_b64 s[2:3], s[6:7], s[2:3]
448+
; GCN-NEXT: s_xor_b32 s5, s3, 0x1010101
449+
; GCN-NEXT: s_lshl_b32 s6, s6, 3
450+
; GCN-NEXT: s_xor_b32 s4, s2, 0x1010101
451+
; GCN-NEXT: s_lshl_b64 s[6:7], 0xff, s6
452+
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
453+
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
454454
; GCN-NEXT: v_mov_b32_e32 v0, s0
455455
; GCN-NEXT: v_mov_b32_e32 v2, s2
456456
; GCN-NEXT: v_mov_b32_e32 v1, s1

0 commit comments

Comments
 (0)