Skip to content

Commit 2f0989e

Browse files
committed
[SelectionDAG] Use unaligned store/load to move AVX registers onto stack for insertelement
Prior to this patch, SelectionDAG generated aligned move onto stacks for AVX registers when the function was marked as a no-realign-stack function. This lead to misalignment between the stack and the instruction generated. This patch fixes the issue. There was a similar issue reported for `extractelement` which was fixed in #a6614ec5b7c1dbfc4b847884c5de780cf75e8e9c
1 parent b56b3d7 commit 2f0989e

File tree

2 files changed

+53
-32
lines changed

2 files changed

+53
-32
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 35 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,19 @@ SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
363363
return Result;
364364
}
365365

366+
// Helper function that generates an MMO that considers the alignment of the
367+
// stack, and the size of the stack object
368+
static MachineMemOperand *getStackAlignedMMO(SDValue StackPtr,
369+
MachineFunction &MF,
370+
bool isObjectScalable) {
371+
auto &MFI = MF.getFrameInfo();
372+
int FI = cast<FrameIndexSDNode>(StackPtr)->getIndex();
373+
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
374+
uint64_t ObjectSize = isObjectScalable ? ~UINT64_C(0) : MFI.getObjectSize(FI);
375+
return MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
376+
ObjectSize, MFI.getObjectAlign(FI));
377+
}
378+
366379
/// Some target cannot handle a variable insertion index for the
367380
/// INSERT_VECTOR_ELT instruction. In this case, it
368381
/// is necessary to spill the vector being inserted into to memory, perform
@@ -384,23 +397,23 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
384397
EVT VT = Tmp1.getValueType();
385398
EVT EltVT = VT.getVectorElementType();
386399
SDValue StackPtr = DAG.CreateStackTemporary(VT);
387-
388-
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
400+
MachineMemOperand *AlignedMMO = getStackAlignedMMO(
401+
StackPtr, DAG.getMachineFunction(), EltVT.isScalableVector());
389402

390403
// Store the vector.
391-
SDValue Ch = DAG.getStore(
392-
DAG.getEntryNode(), dl, Tmp1, StackPtr,
393-
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
404+
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, AlignedMMO);
394405

395406
SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3);
396407

397408
// Store the scalar value.
398-
Ch = DAG.getTruncStore(
399-
Ch, dl, Tmp2, StackPtr2,
400-
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
401-
// Load the updated vector.
402-
return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
403-
DAG.getMachineFunction(), SPFI));
409+
Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, EltVT, AlignedMMO);
410+
411+
Align ElementAlignment = std::min(cast<StoreSDNode>(Ch)->getAlign(),
412+
DAG.getDataLayout().getPrefTypeAlign(
413+
VT.getTypeForEVT(*DAG.getContext())));
414+
415+
return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo(),
416+
ElementAlignment);
404417
}
405418

406419
SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
@@ -1378,19 +1391,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
13781391
}
13791392
}
13801393

1381-
// Helper function that generates an MMO that considers the alignment of the
1382-
// stack, and the size of the stack object
1383-
static MachineMemOperand *getStackAlignedMMO(SDValue StackPtr,
1384-
MachineFunction &MF,
1385-
bool isObjectScalable) {
1386-
auto &MFI = MF.getFrameInfo();
1387-
int FI = cast<FrameIndexSDNode>(StackPtr)->getIndex();
1388-
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
1389-
uint64_t ObjectSize = isObjectScalable ? ~UINT64_C(0) : MFI.getObjectSize(FI);
1390-
return MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
1391-
ObjectSize, MFI.getObjectAlign(FI));
1392-
}
1393-
13941394
SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
13951395
SDValue Vec = Op.getOperand(0);
13961396
SDValue Idx = Op.getOperand(1);
@@ -1488,24 +1488,27 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
14881488
EVT VecVT = Vec.getValueType();
14891489
EVT SubVecVT = Part.getValueType();
14901490
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
1491-
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
1492-
MachinePointerInfo PtrInfo =
1493-
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
1491+
MachineMemOperand *AlignedMMO = getStackAlignedMMO(
1492+
StackPtr, DAG.getMachineFunction(), VecVT.isScalableVector());
14941493

14951494
// First store the whole vector.
1496-
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
1495+
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, AlignedMMO);
14971496

14981497
// Then store the inserted part.
14991498
SDValue SubStackPtr =
15001499
TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx);
15011500

15021501
// Store the subvector.
1503-
Ch = DAG.getStore(
1504-
Ch, dl, Part, SubStackPtr,
1505-
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
1502+
Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, AlignedMMO);
1503+
1504+
Align ElementAlignment =
1505+
std::min(cast<StoreSDNode>(Ch)->getAlign(),
1506+
DAG.getDataLayout().getPrefTypeAlign(
1507+
Op.getValueType().getTypeForEVT(*DAG.getContext())));
15061508

15071509
// Finally, load the updated vector.
1508-
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
1510+
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo(),
1511+
ElementAlignment);
15091512
}
15101513

15111514
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
3+
4+
define <8 x i32> @foo(<8 x i32> %arg1, i32 %n) #0 {
5+
; CHECK-LABEL: foo:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
8+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
9+
; CHECK-NEXT: andl $7, %edi
10+
; CHECK-NEXT: movl $42, -40(%rsp,%rdi,4)
11+
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
12+
; CHECK-NEXT: retq
13+
entry:
14+
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
15+
ret <8 x i32> %a
16+
}
17+
18+
attributes #0 = { "no-realign-stack" "target-cpu"="haswell" }

0 commit comments

Comments
 (0)