Skip to content

Commit 92a4d89

Browse files
committed
[SelectionDAG] Use unaligned store/load to move AVX registers onto stack for insertelement
Prior to this patch, SelectionDAG generated aligned move onto stacks for AVX registers when the function was marked as a no-realign-stack function. This lead to misalignment between the stack and the instruction generated. This patch fixes the issue. There was a similar issue reported for `extractelement` which was fixed in #a6614ec5b7c1dbfc4b847884c5de780cf75e8e9c
1 parent d2a26a7 commit 92a4d89

File tree

2 files changed

+41
-18
lines changed

2 files changed

+41
-18
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -398,22 +398,24 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
398398
EVT EltVT = VT.getVectorElementType();
399399
SDValue StackPtr = DAG.CreateStackTemporary(VT);
400400

401-
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
401+
MachineMemOperand *AlignedMMO = getStackAlignedMMO(
402+
StackPtr, DAG.getMachineFunction(), EltVT.isScalableVector());
402403

403404
// Store the vector.
404-
SDValue Ch = DAG.getStore(
405-
DAG.getEntryNode(), dl, Tmp1, StackPtr,
406-
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
405+
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, AlignedMMO);
406+
407407

408408
SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3);
409409

410410
// Store the scalar value.
411-
Ch = DAG.getTruncStore(
412-
Ch, dl, Tmp2, StackPtr2,
413-
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
414-
// Load the updated vector.
415-
return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
416-
DAG.getMachineFunction(), SPFI));
411+
Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, EltVT, AlignedMMO);
412+
413+
Align ElementAlignment = std::min(cast<StoreSDNode>(Ch)->getAlign(),
414+
DAG.getDataLayout().getPrefTypeAlign(
415+
VT.getTypeForEVT(*DAG.getContext())));
416+
417+
return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo(),
418+
ElementAlignment);
417419
}
418420

419421
SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
@@ -1489,24 +1491,27 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
14891491
EVT VecVT = Vec.getValueType();
14901492
EVT SubVecVT = Part.getValueType();
14911493
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
1492-
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
1493-
MachinePointerInfo PtrInfo =
1494-
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
1494+
MachineMemOperand *AlignedMMO = getStackAlignedMMO(
1495+
StackPtr, DAG.getMachineFunction(), VecVT.isScalableVector());
14951496

14961497
// First store the whole vector.
1497-
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
1498+
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, AlignedMMO);
14981499

14991500
// Then store the inserted part.
15001501
SDValue SubStackPtr =
15011502
TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx);
15021503

15031504
// Store the subvector.
1504-
Ch = DAG.getStore(
1505-
Ch, dl, Part, SubStackPtr,
1506-
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
1505+
Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, AlignedMMO);
1506+
1507+
Align ElementAlignment =
1508+
std::min(cast<StoreSDNode>(Ch)->getAlign(),
1509+
DAG.getDataLayout().getPrefTypeAlign(
1510+
Op.getValueType().getTypeForEVT(*DAG.getContext())));
15071511

15081512
// Finally, load the updated vector.
1509-
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
1513+
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo(),
1514+
ElementAlignment);
15101515
}
15111516

15121517
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
3+
4+
define <8 x i32> @foo(<8 x i32> %arg1, i32 %n) #0 {
5+
; CHECK-LABEL: foo:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
8+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
9+
; CHECK-NEXT: andl $7, %edi
10+
; CHECK-NEXT: movl $42, -40(%rsp,%rdi,4)
11+
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
12+
; CHECK-NEXT: retq
13+
entry:
14+
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
15+
ret <8 x i32> %a
16+
}
17+
18+
attributes #0 = { "no-realign-stack" "target-cpu"="haswell" }

0 commit comments

Comments
 (0)