Skip to content

Commit 54a99c8

Browse files
author
Manish Kausik H
committed
[SelectionDAG] Use unaligned store to legalize EXTRACT_VECTOR_ELT type
This patch ports the commit a6614ec to SelectionDAG TypeLegalization. Fixes #98044
1 parent 5c2bdc5 commit 54a99c8

File tree

3 files changed

+42
-6
lines changed

3 files changed

+42
-6
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ class SelectionDAGLegalize {
261261

262262
// Helper function that generates an MMO that considers the alignment of the
263263
// stack, and the size of the stack object
264+
// Note: A copy of this function exists in LegalizeVectorTypes.cpp
264265
static MachineMemOperand *getStackAlignedMMO(SDValue StackPtr,
265266
MachineFunction &MF,
266267
bool isObjectScalable) {

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/Analysis/MemoryLocation.h"
2525
#include "llvm/Analysis/VectorUtils.h"
2626
#include "llvm/CodeGen/ISDOpcodes.h"
27+
#include "llvm/CodeGen/MachineFrameInfo.h"
2728
#include "llvm/IR/DataLayout.h"
2829
#include "llvm/Support/ErrorHandling.h"
2930
#include "llvm/Support/TypeSize.h"
@@ -221,6 +222,22 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
221222
SetScalarizedVector(SDValue(N, ResNo), R);
222223
}
223224

225+
// Helper function that generates an MMO that considers the alignment of the
226+
// stack, and the size of the stack object
227+
// Note: A copy of this function exists in LegalizeDAG.cpp
228+
static MachineMemOperand *getStackAlignedMMO(SDValue StackPtr,
229+
MachineFunction &MF,
230+
bool isObjectScalable) {
231+
auto &MFI = MF.getFrameInfo();
232+
int FI = cast<FrameIndexSDNode>(StackPtr)->getIndex();
233+
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
234+
LocationSize ObjectSize = isObjectScalable
235+
? LocationSize::beforeOrAfterPointer()
236+
: LocationSize::precise(MFI.getObjectSize(FI));
237+
return MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
238+
ObjectSize, MFI.getObjectAlign(FI));
239+
}
240+
224241
SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
225242
SDValue LHS = GetScalarizedVector(N->getOperand(0));
226243
SDValue RHS = GetScalarizedVector(N->getOperand(1));
@@ -3531,14 +3548,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
35313548
// Store the vector to the stack.
35323549
// In cases where the vector is illegal it will be broken down into parts
35333550
// and stored in parts - we should use the alignment for the smallest part.
3534-
Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
3551+
Align SmallestAlign =
3552+
std::min(DAG.getSubtarget().getFrameLowering()->getStackAlign(),
3553+
DAG.getReducedAlign(VecVT, /*UseABI=*/false));
35353554
SDValue StackPtr =
35363555
DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
3537-
auto &MF = DAG.getMachineFunction();
3538-
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
3539-
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
3540-
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
3541-
SmallestAlign);
3556+
MachineMemOperand *StoreMMO = getStackAlignedMMO(
3557+
StackPtr, DAG.getMachineFunction(), VecVT.isScalableVector());
3558+
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, StoreMMO);
35423559

35433560
// Load back the required element.
35443561
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);

llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,22 @@ entry:
1717
ret i32 %b
1818
}
1919

20+
define i32 @foo2(i32 %arg1) #1 {
21+
; CHECK-LABEL: foo2:
22+
; CHECK: # %bb.0: # %entry
23+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
24+
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
25+
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
26+
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
27+
; CHECK-NEXT: andl $31, %edi
28+
; CHECK-NEXT: movzwl -72(%rsp,%rdi,2), %eax
29+
; CHECK-NEXT: vzeroupper
30+
; CHECK-NEXT: retq
31+
entry:
32+
%a = extractelement <32 x i16> zeroinitializer, i32 %arg1
33+
%b = zext i16 %a to i32
34+
ret i32 %b
35+
}
36+
2037
attributes #0 = { "no-realign-stack" "target-cpu"="skylake-avx512" }
38+
attributes #1 = { "no-realign-stack" "target-cpu"="skylake" }

0 commit comments

Comments
 (0)