Skip to content

Commit a21abc7

Browse files
committed
[X86] Align i128 to 16 bytes in x86 datalayouts
This is an attempt at rebooting https://reviews.llvm.org/D28990 I've included AutoUpgrade changes to modify the data layout to satisfy the compatible layout check. But this does mean alloca, loads, stores, etc in old IR will automatically get this new alignment. This should fix PR46320. Reviewed By: echristo, rnk, tmgross Differential Revision: https://reviews.llvm.org/D86310
1 parent 84ff49d commit a21abc7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+614
-958
lines changed

clang/lib/Basic/Targets/OSTargets.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -817,10 +817,10 @@ class LLVM_LIBRARY_VISIBILITY NaClTargetInfo : public OSTargetInfo<Target> {
817817
// Handled in ARM's setABI().
818818
} else if (Triple.getArch() == llvm::Triple::x86) {
819819
this->resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-"
820-
"i64:64-n8:16:32-S128");
820+
"i64:64-i128:128-n8:16:32-S128");
821821
} else if (Triple.getArch() == llvm::Triple::x86_64) {
822822
this->resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-"
823-
"i64:64-n8:16:32:64-S128");
823+
"i64:64-i128:128-n8:16:32:64-S128");
824824
} else if (Triple.getArch() == llvm::Triple::mipsel) {
825825
// Handled on mips' setDataLayout.
826826
} else {

clang/lib/Basic/Targets/X86.h

+22-21
Original file line numberDiff line numberDiff line change
@@ -431,13 +431,12 @@ class LLVM_LIBRARY_VISIBILITY X86_32TargetInfo : public X86TargetInfo {
431431
LongDoubleWidth = 96;
432432
LongDoubleAlign = 32;
433433
SuitableAlign = 128;
434-
resetDataLayout(
435-
Triple.isOSBinFormatMachO()
436-
? "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-"
437-
"f80:32-n8:16:32-S128"
438-
: "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-"
439-
"f80:32-n8:16:32-S128",
440-
Triple.isOSBinFormatMachO() ? "_" : "");
434+
resetDataLayout(Triple.isOSBinFormatMachO()
435+
? "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:"
436+
"128-f64:32:64-f80:32-n8:16:32-S128"
437+
: "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:"
438+
"128-f64:32:64-f80:32-n8:16:32-S128",
439+
Triple.isOSBinFormatMachO() ? "_" : "");
441440
SizeType = UnsignedInt;
442441
PtrDiffType = SignedInt;
443442
IntPtrType = SignedInt;
@@ -542,8 +541,9 @@ class LLVM_LIBRARY_VISIBILITY DarwinI386TargetInfo
542541
UseSignedCharForObjCBool = false;
543542
SizeType = UnsignedLong;
544543
IntPtrType = SignedLong;
545-
resetDataLayout("e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-"
546-
"f80:128-n8:16:32-S128", "_");
544+
resetDataLayout("e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-"
545+
"f64:32:64-f80:128-n8:16:32-S128",
546+
"_");
547547
HasAlignMac68kSupport = true;
548548
}
549549

@@ -570,7 +570,7 @@ class LLVM_LIBRARY_VISIBILITY WindowsX86_32TargetInfo
570570
getTriple().isOSWindows() && getTriple().isOSBinFormatCOFF();
571571
bool IsMSVC = getTriple().isWindowsMSVCEnvironment();
572572
std::string Layout = IsWinCOFF ? "e-m:x" : "e-m:e";
573-
Layout += "-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-";
573+
Layout += "-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-";
574574
Layout += IsMSVC ? "f80:128" : "f80:32";
575575
Layout += "-n8:16:32-a:0:32-S32";
576576
resetDataLayout(Layout, IsWinCOFF ? "_" : "");
@@ -621,8 +621,8 @@ class LLVM_LIBRARY_VISIBILITY CygwinX86_32TargetInfo : public X86_32TargetInfo {
621621
: X86_32TargetInfo(Triple, Opts) {
622622
this->WCharType = TargetInfo::UnsignedShort;
623623
DoubleAlign = LongLongAlign = 64;
624-
resetDataLayout("e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:"
625-
"32-n8:16:32-a:0:32-S32",
624+
resetDataLayout("e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-"
625+
"i128:128-f80:32-n8:16:32-a:0:32-S32",
626626
"_");
627627
}
628628

@@ -660,8 +660,8 @@ class LLVM_LIBRARY_VISIBILITY MCUX86_32TargetInfo : public X86_32TargetInfo {
660660
: X86_32TargetInfo(Triple, Opts) {
661661
LongDoubleWidth = 64;
662662
LongDoubleFormat = &llvm::APFloat::IEEEdouble();
663-
resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:32-f64:"
664-
"32-f128:32-n8:16:32-a:0:32-S32");
663+
resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:32-"
664+
"f64:32-f128:32-n8:16:32-a:0:32-S32");
665665
WIntType = UnsignedInt;
666666
}
667667

@@ -721,11 +721,11 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo {
721721

722722
// Pointers are 32-bit in x32.
723723
resetDataLayout(IsX32 ? "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-"
724-
"i64:64-f80:128-n8:16:32:64-S128"
725-
: IsWinCOFF ? "e-m:w-p270:32:32-p271:32:32-p272:64:"
726-
"64-i64:64-f80:128-n8:16:32:64-S128"
727-
: "e-m:e-p270:32:32-p271:32:32-p272:64:"
728-
"64-i64:64-f80:128-n8:16:32:64-S128");
724+
"i64:64-i128:128-f80:128-n8:16:32:64-S128"
725+
: IsWinCOFF ? "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:"
726+
"64-i128:128-f80:128-n8:16:32:64-S128"
727+
: "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:"
728+
"64-i128:128-f80:128-n8:16:32:64-S128");
729729

730730
// Use fpret only for long double.
731731
RealTypeUsesObjCFPRetMask = (unsigned)FloatModeKind::LongDouble;
@@ -922,8 +922,9 @@ class LLVM_LIBRARY_VISIBILITY DarwinX86_64TargetInfo
922922
llvm::Triple T = llvm::Triple(Triple);
923923
if (T.isiOS())
924924
UseSignedCharForObjCBool = false;
925-
resetDataLayout("e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:"
926-
"16:32:64-S128", "_");
925+
resetDataLayout("e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-"
926+
"f80:128-n8:16:32:64-S128",
927+
"_");
927928
}
928929

929930
bool handleTargetFeatures(std::vector<std::string> &Features,

clang/test/CodeGen/target-data.c

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
11
// RUN: %clang_cc1 -triple i686-unknown-unknown -emit-llvm -o - %s | \
22
// RUN: FileCheck --check-prefix=I686-UNKNOWN %s
3-
// I686-UNKNOWN: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
3+
// I686-UNKNOWN: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128"
44

55
// RUN: %clang_cc1 -triple i686-apple-darwin9 -emit-llvm -o - %s | \
66
// RUN: FileCheck --check-prefix=I686-DARWIN %s
7-
// I686-DARWIN: target datalayout = "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:128-n8:16:32-S128"
7+
// I686-DARWIN: target datalayout = "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:128-n8:16:32-S128"
88

99
// RUN: %clang_cc1 -triple i686-unknown-win32 -emit-llvm -o - %s | \
1010
// RUN: FileCheck --check-prefix=I686-WIN32 %s
11-
// I686-WIN32: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-a:0:32-S32"
11+
// I686-WIN32: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32-a:0:32-S32"
1212

1313
// RUN: %clang_cc1 -triple i686-unknown-cygwin -emit-llvm -o - %s | \
1414
// RUN: FileCheck --check-prefix=I686-CYGWIN %s
15-
// I686-CYGWIN: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32"
15+
// I686-CYGWIN: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:32-n8:16:32-a:0:32-S32"
1616

1717
// RUN: %clang_cc1 -triple i686-pc-macho -emit-llvm -o - %s | \
1818
// RUN: FileCheck --check-prefix=I686-MACHO %s
19-
// I686-MACHO: target datalayout = "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
19+
// I686-MACHO: target datalayout = "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128"
2020

2121
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
2222
// RUN: FileCheck --check-prefix=X86_64 %s
23-
// X86_64: target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
23+
// X86_64: target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
2424

2525
// RUN: %clang_cc1 -triple xcore-unknown-unknown -emit-llvm -o - %s | \
2626
// RUN: FileCheck --check-prefix=XCORE %s
@@ -92,11 +92,11 @@
9292

9393
// RUN: %clang_cc1 -triple i686-nacl -o - -emit-llvm %s | \
9494
// RUN: FileCheck %s -check-prefix=I686-NACL
95-
// I686-NACL: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-n8:16:32-S128"
95+
// I686-NACL: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n8:16:32-S128"
9696

9797
// RUN: %clang_cc1 -triple x86_64-nacl -o - -emit-llvm %s | \
9898
// RUN: FileCheck %s -check-prefix=X86_64-NACL
99-
// X86_64-NACL: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-n8:16:32:64-S128"
99+
// X86_64-NACL: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n8:16:32:64-S128"
100100

101101
// RUN: %clang_cc1 -triple arm-nacl -o - -emit-llvm %s | \
102102
// RUN: FileCheck %s -check-prefix=ARM-NACL

llvm/docs/ReleaseNotes.rst

+5
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@ Changes to the Windows Target
129129
Changes to the X86 Backend
130130
--------------------------
131131

132+
* The ``i128`` type now matches GCC and clang's ``__int128`` type. This mainly
133+
benefits external projects such as Rust which aim to be binary compatible
134+
with C, but also fixes code generation where LLVM already assumed that the
135+
type matched and called into libgcc helper functions.
136+
132137
Changes to the OCaml bindings
133138
-----------------------------
134139

llvm/lib/IR/AutoUpgrade.cpp

+18-2
Original file line numberDiff line numberDiff line change
@@ -5201,13 +5201,29 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
52015201
// If the datalayout matches the expected format, add pointer size address
52025202
// spaces to the datalayout.
52035203
std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
5204-
if (!DL.contains(AddrSpaces)) {
5204+
if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {
52055205
SmallVector<StringRef, 4> Groups;
52065206
Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5207-
if (R.match(DL, &Groups))
5207+
if (R.match(Res, &Groups))
52085208
Res = (Groups[1] + AddrSpaces + Groups[3]).str();
52095209
}
52105210

5211+
// i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5212+
// for i128 operations prior to this being reflected in the data layout, and
5213+
// clang mostly produced LLVM IR that already aligned i128 to 16 byte
5214+
// boundaries, so although this is a breaking change, the upgrade is expected
5215+
// to fix more IR than it breaks.
5216+
// Intel MCU is an exception and uses 4-byte-alignment.
5217+
if (!T.isOSIAMCU()) {
5218+
std::string I128 = "-i128:128";
5219+
if (StringRef Ref = Res; !Ref.contains(I128)) {
5220+
SmallVector<StringRef, 4> Groups;
5221+
Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5222+
if (R.match(Res, &Groups))
5223+
Res = (Groups[1] + I128 + Groups[3]).str();
5224+
}
5225+
}
5226+
52115227
// For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
52125228
// Raising the alignment is safe because Clang did not produce f80 values in
52135229
// the MSVC environment before this upgrade was added.

llvm/lib/Target/X86/X86TargetMachine.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,14 @@ static std::string computeDataLayout(const Triple &TT) {
130130
Ret += "-p270:32:32-p271:32:32-p272:64:64";
131131

132132
// Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
133+
// 128 bit integers are not specified in the 32-bit ABIs but are used
134+
// internally for lowering f128, so we match the alignment to that.
133135
if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl())
134-
Ret += "-i64:64";
136+
Ret += "-i64:64-i128:128";
135137
else if (TT.isOSIAMCU())
136138
Ret += "-i64:32-f64:32";
137139
else
138-
Ret += "-f64:32:64";
140+
Ret += "-i128:128-f64:32:64";
139141

140142
// Some ABIs align long double to 128 bits, others to 32.
141143
if (TT.isOSNaCl() || TT.isOSIAMCU())

llvm/test/Bitcode/upgrade-datalayout.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@
55
target datalayout = "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128"
66
target triple = "x86_64-unknown-linux-gnu"
77

8-
; CHECK: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
8+
; CHECK: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
99

llvm/test/Bitcode/upgrade-datalayout2.ll

+6
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
; match a possible x86 datalayout.
33
;
44
; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s
5+
;
6+
; XFAIL: *
7+
; No implementation of the data layout upgrade ever checked whether the data
8+
; layout was a possible x86 data layout, so the logic that this test aims to
9+
; check was never implemented. We always upgraded data layouts that were not
10+
; possible x86 data layouts, we merely did not previously upgrade this one.
511

612
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
713
target triple = "x86_64-unknown-linux-gnu"

llvm/test/Bitcode/upgrade-datalayout3.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55
target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
66
target triple = "i686-pc-windows-msvc"
77

8-
; CHECK: target datalayout = "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-S32"
8+
; CHECK: target datalayout = "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32-S32"

llvm/test/Bitcode/upgrade-datalayout4.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55
target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32"
66
target triple = "i686-pc-windows-msvc"
77

8-
; CHECK: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-a:0:32-S32"
8+
; CHECK: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32-a:0:32-S32"
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
; Test to make sure datalayout is automatically upgraded.
2+
;
3+
; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s
4+
5+
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
6+
target triple = "i386-pc-linux-gnu"
7+
8+
; CHECK: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128"

llvm/test/CodeGen/X86/AMX/amx-config.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,10 @@ define <4 x i32> @test_api(i32 %0, i16 signext %1, i16 signext %2, <4 x i32> %xm
7979
; AVX1-LABEL: test_api:
8080
; AVX1: # %bb.0:
8181
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
82-
; AVX1-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp)
83-
; AVX1-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp)
84-
; AVX1-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp)
85-
; AVX1-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp)
82+
; AVX1-NEXT: vmovups %xmm1, -{{[0-9]+}}(%rsp)
83+
; AVX1-NEXT: vmovups %xmm1, -{{[0-9]+}}(%rsp)
84+
; AVX1-NEXT: vmovups %xmm1, -{{[0-9]+}}(%rsp)
85+
; AVX1-NEXT: vmovups %xmm1, -{{[0-9]+}}(%rsp)
8686
; AVX1-NEXT: movb $1, -{{[0-9]+}}(%rsp)
8787
; AVX1-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
8888
; AVX1-NEXT: movw %dx, -{{[0-9]+}}(%rsp)

llvm/test/CodeGen/X86/arg-copy-elide.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,8 @@ define void @split_i128(ptr %sret, i128 %x) {
186186
; CHECK-NEXT: pushl %ebx
187187
; CHECK-NEXT: pushl %edi
188188
; CHECK-NEXT: pushl %esi
189-
; CHECK-NEXT: andl $-8, %esp
190-
; CHECK-NEXT: subl $32, %esp
189+
; CHECK-NEXT: andl $-16, %esp
190+
; CHECK-NEXT: subl $48, %esp
191191
; CHECK-NEXT: movl 12(%ebp), %eax
192192
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
193193
; CHECK-NEXT: movl 16(%ebp), %ebx

llvm/test/CodeGen/X86/atomic-idempotent.ll

+13-22
Original file line numberDiff line numberDiff line change
@@ -182,12 +182,10 @@ define i128 @or128(ptr %p) {
182182
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
183183
; X86-SSE2-NEXT: movl %esp, %ebp
184184
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
185-
; X86-SSE2-NEXT: pushl %edi
186185
; X86-SSE2-NEXT: pushl %esi
187-
; X86-SSE2-NEXT: andl $-8, %esp
188-
; X86-SSE2-NEXT: subl $16, %esp
189-
; X86-SSE2-NEXT: .cfi_offset %esi, -16
190-
; X86-SSE2-NEXT: .cfi_offset %edi, -12
186+
; X86-SSE2-NEXT: andl $-16, %esp
187+
; X86-SSE2-NEXT: subl $32, %esp
188+
; X86-SSE2-NEXT: .cfi_offset %esi, -12
191189
; X86-SSE2-NEXT: movl 8(%ebp), %esi
192190
; X86-SSE2-NEXT: movl %esp, %eax
193191
; X86-SSE2-NEXT: pushl $0
@@ -198,18 +196,11 @@ define i128 @or128(ptr %p) {
198196
; X86-SSE2-NEXT: pushl %eax
199197
; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
200198
; X86-SSE2-NEXT: addl $20, %esp
201-
; X86-SSE2-NEXT: movl (%esp), %eax
202-
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
203-
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
204-
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
205-
; X86-SSE2-NEXT: movl %edi, 8(%esi)
206-
; X86-SSE2-NEXT: movl %edx, 12(%esi)
207-
; X86-SSE2-NEXT: movl %eax, (%esi)
208-
; X86-SSE2-NEXT: movl %ecx, 4(%esi)
199+
; X86-SSE2-NEXT: movaps (%esp), %xmm0
200+
; X86-SSE2-NEXT: movaps %xmm0, (%esi)
209201
; X86-SSE2-NEXT: movl %esi, %eax
210-
; X86-SSE2-NEXT: leal -8(%ebp), %esp
202+
; X86-SSE2-NEXT: leal -4(%ebp), %esp
211203
; X86-SSE2-NEXT: popl %esi
212-
; X86-SSE2-NEXT: popl %edi
213204
; X86-SSE2-NEXT: popl %ebp
214205
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
215206
; X86-SSE2-NEXT: retl $4
@@ -223,7 +214,7 @@ define i128 @or128(ptr %p) {
223214
; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
224215
; X86-SLM-NEXT: pushl %edi
225216
; X86-SLM-NEXT: pushl %esi
226-
; X86-SLM-NEXT: andl $-8, %esp
217+
; X86-SLM-NEXT: andl $-16, %esp
227218
; X86-SLM-NEXT: subl $16, %esp
228219
; X86-SLM-NEXT: .cfi_offset %esi, -16
229220
; X86-SLM-NEXT: .cfi_offset %edi, -12
@@ -263,7 +254,7 @@ define i128 @or128(ptr %p) {
263254
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
264255
; X86-ATOM-NEXT: pushl %edi
265256
; X86-ATOM-NEXT: pushl %esi
266-
; X86-ATOM-NEXT: andl $-8, %esp
257+
; X86-ATOM-NEXT: andl $-16, %esp
267258
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
268259
; X86-ATOM-NEXT: .cfi_offset %esi, -16
269260
; X86-ATOM-NEXT: .cfi_offset %edi, -12
@@ -528,8 +519,8 @@ define void @or128_nouse_seq_cst(ptr %p) {
528519
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
529520
; X86-SSE2-NEXT: movl %esp, %ebp
530521
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
531-
; X86-SSE2-NEXT: andl $-8, %esp
532-
; X86-SSE2-NEXT: subl $16, %esp
522+
; X86-SSE2-NEXT: andl $-16, %esp
523+
; X86-SSE2-NEXT: subl $32, %esp
533524
; X86-SSE2-NEXT: movl %esp, %eax
534525
; X86-SSE2-NEXT: pushl $0
535526
; X86-SSE2-NEXT: pushl $0
@@ -551,8 +542,8 @@ define void @or128_nouse_seq_cst(ptr %p) {
551542
; X86-SLM-NEXT: .cfi_offset %ebp, -8
552543
; X86-SLM-NEXT: movl %esp, %ebp
553544
; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
554-
; X86-SLM-NEXT: andl $-8, %esp
555-
; X86-SLM-NEXT: subl $16, %esp
545+
; X86-SLM-NEXT: andl $-16, %esp
546+
; X86-SLM-NEXT: subl $32, %esp
556547
; X86-SLM-NEXT: movl 8(%ebp), %eax
557548
; X86-SLM-NEXT: movl %esp, %ecx
558549
; X86-SLM-NEXT: pushl $0
@@ -575,7 +566,7 @@ define void @or128_nouse_seq_cst(ptr %p) {
575566
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
576567
; X86-ATOM-NEXT: leal (%esp), %ebp
577568
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
578-
; X86-ATOM-NEXT: andl $-8, %esp
569+
; X86-ATOM-NEXT: andl $-16, %esp
579570
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
580571
; X86-ATOM-NEXT: movl 8(%ebp), %eax
581572
; X86-ATOM-NEXT: movl %esp, %ecx

0 commit comments

Comments
 (0)