Skip to content

Commit 9d7d34c

Browse files
committed
[X86][MS] Fix the aligement mismatch of vector variable arguments on Win32
The alignment of vector variable arguments in callee side is 4, which is aligned with MSVC. But the caller aligns them to the size of vector arguments. It results in run fails. This patch fixes this problem by trimming it to 4 bytes for variable arguments on Win32. Fixed vector arguments are passed by pointer on Win32. So they don't have the problem. I don't find a doc in MSDN for this calling conversion, so I did several experiments here: https://godbolt.org/z/n1zn1Gx1z Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D108887
1 parent 68b9d8e commit 9d7d34c

File tree

3 files changed

+61
-1
lines changed

3 files changed

+61
-1
lines changed

llvm/lib/Target/X86/X86CallingConv.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ class CCIfNotSubtarget<string F, CCAction A>
2323
"(State.getMachineFunction().getSubtarget()).", F),
2424
A>;
2525

26+
/// CCIfIsVarArgOnWin - Match if isVarArg on Windows 32bits.
27+
class CCIfIsVarArgOnWin<CCAction A>
28+
: CCIf<"State.isVarArg() && "
29+
"State.getMachineFunction().getSubtarget().getTargetTriple()."
30+
"isWindowsMSVCEnvironment()",
31+
A>;
32+
2633
// Register classes for RegCall
2734
class RC_X86_RegCall {
2835
list<Register> GPR_8 = [];
@@ -771,6 +778,22 @@ def CC_X86_32_Vector_Common : CallingConv<[
771778
CCAssignToStack<64, 64>>
772779
]>;
773780

781+
/// CC_X86_Win32_Vector - In X86 Win32 calling conventions, extra vector
782+
/// values are spilled on the stack.
783+
def CC_X86_Win32_Vector : CallingConv<[
784+
// Other SSE vectors get 16-byte stack slots that are 4-byte aligned.
785+
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
786+
CCAssignToStack<16, 4>>,
787+
788+
// 256-bit AVX vectors get 32-byte stack slots that are 4-byte aligned.
789+
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
790+
CCAssignToStack<32, 4>>,
791+
792+
// 512-bit AVX 512-bit vectors get 64-byte stack slots that are 4-byte aligned.
793+
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
794+
CCAssignToStack<64, 4>>
795+
]>;
796+
774797
// CC_X86_32_Vector_Standard - The first 3 vector arguments are passed in
775798
// vector registers
776799
def CC_X86_32_Vector_Standard : CallingConv<[
@@ -787,6 +810,7 @@ def CC_X86_32_Vector_Standard : CallingConv<[
787810
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
788811
CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>>,
789812

813+
CCIfIsVarArgOnWin<CCDelegateTo<CC_X86_Win32_Vector>>,
790814
CCDelegateTo<CC_X86_32_Vector_Common>
791815
]>;
792816

llvm/test/CodeGen/X86/vaargs-win32.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; RUN: llc -mcpu=generic -mtriple=i686-pc-windows-msvc -mattr=+sse < %s | FileCheck %s --check-prefix=MSVC
2+
; RUN: llc -mcpu=generic -mtriple=i686-pc-mingw32 -mattr=+sse < %s | FileCheck %s --check-prefix=MINGW
3+
4+
@a = external dso_local global <4 x float>, align 16
5+
6+
define dso_local void @testPastArguments() nounwind {
7+
; MSVC-LABEL: testPastArguments:
8+
; MSVC: # %bb.0: # %entry
9+
; MSVC-NEXT: subl $20, %esp
10+
; MSVC-NEXT: movaps _a, %xmm0
11+
; MSVC-NEXT: movups %xmm0, 4(%esp)
12+
; MSVC-NEXT: movl $1, (%esp)
13+
; MSVC-NEXT: calll _testm128
14+
; MSVC-NEXT: addl $20, %esp
15+
; MSVC-NEXT: retl
16+
;
17+
; MINGW-LABEL: testPastArguments:
18+
; MINGW: # %bb.0: # %entry
19+
; MINGW-NEXT: pushl %ebp
20+
; MINGW-NEXT: movl %esp, %ebp
21+
; MINGW-NEXT: andl $-16, %esp
22+
; MINGW-NEXT: subl $48, %esp
23+
; MINGW-NEXT: movaps _a, %xmm0
24+
; MINGW-NEXT: movaps %xmm0, 16(%esp)
25+
; MINGW-NEXT: movl $1, (%esp)
26+
; MINGW-NEXT: calll _testm128
27+
; MINGW-NEXT: movl %ebp, %esp
28+
; MINGW-NEXT: popl %ebp
29+
; MINGW-NEXT: retl
30+
entry:
31+
%0 = load <4 x float>, <4 x float>* @a, align 16
32+
%call = tail call i32 (i32, ...) @testm128(i32 1, <4 x float> inreg %0)
33+
ret void
34+
}
35+
36+
declare i32 @testm128(i32, ...) nounwind

llvm/test/CodeGen/X86/win32-spill-xmm.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ declare void @bar(<16 x float> %a, i32 %b)
2020
; Check that proper alignment of spilled vector does not affect vargs
2121

2222
; CHECK-LABEL: vargs_not_affected
23-
; CHECK: movl 28(%ebp), %eax
23+
; CHECK: movl 28(%esp), %eax
2424
define i32 @vargs_not_affected(<4 x float> %v, i8* %f, ...) {
2525
entry:
2626
%ap = alloca i8*, align 4

0 commit comments

Comments
 (0)