Skip to content

Commit f7e548c

Browse files
committed
Recommit r358211 "[X86] Use FILD/FIST to implement i64 atomic load on 32-bit targets with X87, but no SSE2"
With correct test checks this time. If we have X87, but not SSE2 we can atomicaly load an i64 value into the significand of an 80-bit extended precision x87 register using fild. We can then use a fist instruction to convert it back to an i64 integ This matches what gcc and icc do for this case and removes an existing FIXME. llvm-svn: 358214
1 parent 1fefee6 commit f7e548c

8 files changed

+428
-396
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25584,17 +25584,18 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
2558425584

2558525585
// Note: this turns large loads into lock cmpxchg8b/16b.
2558625586
// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
25587-
// TODO: In 32-bit mode, use FILD/FISTP when X87 is available?
2558825587
TargetLowering::AtomicExpansionKind
2558925588
X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
2559025589
Type *MemType = LI->getType();
2559125590

2559225591
// If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
25593-
// can use movq to do the load.
25592+
// can use movq to do the load. If we have X87 we can load into an 80-bit
25593+
// X87 register and store it to a stack temporary.
2559425594
bool NoImplicitFloatOps =
2559525595
LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
2559625596
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
25597-
!Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2())
25597+
!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
25598+
(Subtarget.hasSSE2() || Subtarget.hasX87()))
2559825599
return AtomicExpansionKind::None;
2559925600

2560025601
return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
@@ -27440,23 +27441,57 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
2744027441
bool NoImplicitFloatOps =
2744127442
DAG.getMachineFunction().getFunction().hasFnAttribute(
2744227443
Attribute::NoImplicitFloat);
27443-
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
27444-
Subtarget.hasSSE2()) {
27444+
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
2744527445
auto *Node = cast<AtomicSDNode>(N);
27446-
// Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the lower
27447-
// 64-bits.
27448-
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
27449-
SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
27450-
SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
27451-
MVT::i64, Node->getMemOperand());
27452-
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
27453-
DAG.getIntPtrConstant(0, dl));
27454-
Results.push_back(Res);
27455-
Results.push_back(Ld.getValue(1));
27456-
return;
27446+
if (Subtarget.hasSSE2()) {
27447+
// Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the
27448+
// lower 64-bits.
27449+
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
27450+
SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
27451+
SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
27452+
MVT::i64, Node->getMemOperand());
27453+
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
27454+
DAG.getIntPtrConstant(0, dl));
27455+
Results.push_back(Res);
27456+
Results.push_back(Ld.getValue(1));
27457+
return;
27458+
}
27459+
if (Subtarget.hasX87()) {
27460+
// First load this into an 80-bit X87 register. This will put the whole
27461+
// integer into the significand.
27462+
// FIXME: Do we need to glue? See FIXME comment in BuildFILD.
27463+
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue);
27464+
SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
27465+
SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG,
27466+
dl, Tys, Ops, MVT::i64,
27467+
Node->getMemOperand());
27468+
SDValue Chain = Result.getValue(1);
27469+
SDValue InFlag = Result.getValue(2);
27470+
27471+
// Now store the X87 register to a stack temporary and convert to i64.
27472+
// This store is not atomic and doesn't need to be.
27473+
// FIXME: We don't need a stack temporary if the result of the load
27474+
// is already being stored. We could just directly store there.
27475+
SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
27476+
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
27477+
MachinePointerInfo MPI =
27478+
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
27479+
SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag };
27480+
Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl,
27481+
DAG.getVTList(MVT::Other), StoreOps,
27482+
MVT::i64, MPI, 0 /*Align*/,
27483+
MachineMemOperand::MOStore);
27484+
27485+
// Finally load the value back from the stack temporary and return it.
27486+
// This load is not atomic and doesn't need to be.
27487+
// This load will be further type legalized.
27488+
Result = DAG.getLoad(MVT::i64, dl, Chain, StackPtr, MPI);
27489+
Results.push_back(Result);
27490+
Results.push_back(Result.getValue(1));
27491+
return;
27492+
}
2745727493
}
2745827494
// TODO: Use MOVLPS when SSE1 is available?
27459-
// TODO: Use FILD/FISTP when X87 is available?
2746027495
// Delegate to generic TypeLegalization. Situations we can really handle
2746127496
// should have already been dealt with by AtomicExpandPass.cpp.
2746227497
break;
@@ -27649,6 +27684,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
2764927684
case X86ISD::FXOR: return "X86ISD::FXOR";
2765027685
case X86ISD::FILD: return "X86ISD::FILD";
2765127686
case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
27687+
case X86ISD::FIST: return "X86ISD::FIST";
2765227688
case X86ISD::FP_TO_INT_IN_MEM: return "X86ISD::FP_TO_INT_IN_MEM";
2765327689
case X86ISD::FLD: return "X86ISD::FLD";
2765427690
case X86ISD::FST: return "X86ISD::FST";

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -608,16 +608,22 @@ namespace llvm {
608608
FILD,
609609
FILD_FLAG,
610610

611+
/// This instruction implements a fp->int store from FP stack
612+
/// slots. This corresponds to the fist instruction. It takes a
613+
/// chain operand, value to store, address, and glue. The memory VT
614+
/// specifies the type to store as.
615+
FIST,
616+
611617
/// This instruction implements an extending load to FP stack slots.
612618
/// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
613619
/// operand, and ptr to load from. The memory VT specifies the type to
614620
/// load from.
615621
FLD,
616622

617-
/// This instruction implements a truncating store to FP stack
623+
/// This instruction implements a truncating store from FP stack
618624
/// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
619-
/// chain operand, value to store, and address. The memory VT specifies
620-
/// the type to store as.
625+
/// chain operand, value to store, address, and glue. The memory VT
626+
/// specifies the type to store as.
621627
FST,
622628

623629
/// This instruction grabs the address of the next argument

llvm/lib/Target/X86/X86InstrFPStack.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def SDTX86Fld : SDTypeProfile<1, 1, [SDTCisFP<0>,
2121
def SDTX86Fst : SDTypeProfile<0, 2, [SDTCisFP<0>,
2222
SDTCisPtrTy<1>]>;
2323
def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
24+
def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
2425
def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
2526

2627
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -35,6 +36,9 @@ def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
3536
def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
3637
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
3738
SDNPMemOperand]>;
39+
def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist,
40+
[SDNPHasChain, SDNPInGlue, SDNPMayStore,
41+
SDNPMemOperand]>;
3842
def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
3943
def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
4044
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -79,6 +83,11 @@ def X86fildflag64 : PatFrag<(ops node:$ptr), (X86fildflag node:$ptr), [{
7983
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
8084
}]>;
8185

86+
def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
87+
(X86fist node:$val, node:$ptr), [{
88+
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
89+
}]>;
90+
8291
def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr),
8392
(X86fp_to_mem node:$val, node:$ptr), [{
8493
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
@@ -760,6 +769,10 @@ def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
760769
// Used to conv. i64 to f64 since there isn't a SSE version.
761770
def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m64 addr:$src)>;
762771

772+
// Used to conv. between f80 and i64 for i64 atomic loads.
773+
def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m80 addr:$src)>;
774+
def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>;
775+
763776
// FP extensions map onto simple pseudo-value conversions if they are to/from
764777
// the FP stack.
765778
def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,

llvm/test/CodeGen/X86/atomic-fp.ll

Lines changed: 60 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,13 @@ define void @fadd_64r(double* %loc, double %val) nounwind {
7777
; X86-NOSSE-NEXT: pushl %ebx
7878
; X86-NOSSE-NEXT: pushl %esi
7979
; X86-NOSSE-NEXT: andl $-8, %esp
80-
; X86-NOSSE-NEXT: subl $16, %esp
80+
; X86-NOSSE-NEXT: subl $24, %esp
8181
; X86-NOSSE-NEXT: movl 8(%ebp), %esi
82-
; X86-NOSSE-NEXT: xorl %eax, %eax
83-
; X86-NOSSE-NEXT: xorl %edx, %edx
84-
; X86-NOSSE-NEXT: xorl %ecx, %ecx
85-
; X86-NOSSE-NEXT: xorl %ebx, %ebx
86-
; X86-NOSSE-NEXT: lock cmpxchg8b (%esi)
87-
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
82+
; X86-NOSSE-NEXT: fildll (%esi)
83+
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
84+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
85+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
86+
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
8887
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
8988
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
9089
; X86-NOSSE-NEXT: faddl 12(%ebp)
@@ -112,14 +111,13 @@ define void @fadd_64r(double* %loc, double %val) nounwind {
112111
; X86-SSE1-NEXT: pushl %ebx
113112
; X86-SSE1-NEXT: pushl %esi
114113
; X86-SSE1-NEXT: andl $-8, %esp
115-
; X86-SSE1-NEXT: subl $16, %esp
114+
; X86-SSE1-NEXT: subl $24, %esp
116115
; X86-SSE1-NEXT: movl 8(%ebp), %esi
117-
; X86-SSE1-NEXT: xorl %eax, %eax
118-
; X86-SSE1-NEXT: xorl %edx, %edx
119-
; X86-SSE1-NEXT: xorl %ecx, %ecx
120-
; X86-SSE1-NEXT: xorl %ebx, %ebx
121-
; X86-SSE1-NEXT: lock cmpxchg8b (%esi)
122-
; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
116+
; X86-SSE1-NEXT: fildll (%esi)
117+
; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
118+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
119+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
120+
; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
123121
; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
124122
; X86-SSE1-NEXT: fldl {{[0-9]+}}(%esp)
125123
; X86-SSE1-NEXT: faddl 12(%ebp)
@@ -283,13 +281,12 @@ define void @fadd_64g() nounwind {
283281
; X86-NOSSE-NEXT: movl %esp, %ebp
284282
; X86-NOSSE-NEXT: pushl %ebx
285283
; X86-NOSSE-NEXT: andl $-8, %esp
286-
; X86-NOSSE-NEXT: subl $24, %esp
287-
; X86-NOSSE-NEXT: xorl %eax, %eax
288-
; X86-NOSSE-NEXT: xorl %edx, %edx
289-
; X86-NOSSE-NEXT: xorl %ecx, %ecx
290-
; X86-NOSSE-NEXT: xorl %ebx, %ebx
291-
; X86-NOSSE-NEXT: lock cmpxchg8b glob64
292-
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
284+
; X86-NOSSE-NEXT: subl $32, %esp
285+
; X86-NOSSE-NEXT: fildll glob64
286+
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
287+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
288+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
289+
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
293290
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
294291
; X86-NOSSE-NEXT: fld1
295292
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
@@ -315,13 +312,12 @@ define void @fadd_64g() nounwind {
315312
; X86-SSE1-NEXT: movl %esp, %ebp
316313
; X86-SSE1-NEXT: pushl %ebx
317314
; X86-SSE1-NEXT: andl $-8, %esp
318-
; X86-SSE1-NEXT: subl $24, %esp
319-
; X86-SSE1-NEXT: xorl %eax, %eax
320-
; X86-SSE1-NEXT: xorl %edx, %edx
321-
; X86-SSE1-NEXT: xorl %ecx, %ecx
322-
; X86-SSE1-NEXT: xorl %ebx, %ebx
323-
; X86-SSE1-NEXT: lock cmpxchg8b glob64
324-
; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
315+
; X86-SSE1-NEXT: subl $32, %esp
316+
; X86-SSE1-NEXT: fildll glob64
317+
; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
318+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
319+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
320+
; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
325321
; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
326322
; X86-SSE1-NEXT: fld1
327323
; X86-SSE1-NEXT: faddl {{[0-9]+}}(%esp)
@@ -484,13 +480,12 @@ define void @fadd_64imm() nounwind {
484480
; X86-NOSSE-NEXT: movl %esp, %ebp
485481
; X86-NOSSE-NEXT: pushl %ebx
486482
; X86-NOSSE-NEXT: andl $-8, %esp
487-
; X86-NOSSE-NEXT: subl $24, %esp
488-
; X86-NOSSE-NEXT: xorl %eax, %eax
489-
; X86-NOSSE-NEXT: xorl %edx, %edx
490-
; X86-NOSSE-NEXT: xorl %ecx, %ecx
491-
; X86-NOSSE-NEXT: xorl %ebx, %ebx
492-
; X86-NOSSE-NEXT: lock cmpxchg8b -559038737
493-
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
483+
; X86-NOSSE-NEXT: subl $32, %esp
484+
; X86-NOSSE-NEXT: fildll -559038737
485+
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
486+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
487+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
488+
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
494489
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
495490
; X86-NOSSE-NEXT: fld1
496491
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
@@ -516,13 +511,12 @@ define void @fadd_64imm() nounwind {
516511
; X86-SSE1-NEXT: movl %esp, %ebp
517512
; X86-SSE1-NEXT: pushl %ebx
518513
; X86-SSE1-NEXT: andl $-8, %esp
519-
; X86-SSE1-NEXT: subl $24, %esp
520-
; X86-SSE1-NEXT: xorl %eax, %eax
521-
; X86-SSE1-NEXT: xorl %edx, %edx
522-
; X86-SSE1-NEXT: xorl %ecx, %ecx
523-
; X86-SSE1-NEXT: xorl %ebx, %ebx
524-
; X86-SSE1-NEXT: lock cmpxchg8b -559038737
525-
; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
514+
; X86-SSE1-NEXT: subl $32, %esp
515+
; X86-SSE1-NEXT: fildll -559038737
516+
; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
517+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
518+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
519+
; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
526520
; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
527521
; X86-SSE1-NEXT: fld1
528522
; X86-SSE1-NEXT: faddl {{[0-9]+}}(%esp)
@@ -691,13 +685,12 @@ define void @fadd_64stack() nounwind {
691685
; X86-NOSSE-NEXT: movl %esp, %ebp
692686
; X86-NOSSE-NEXT: pushl %ebx
693687
; X86-NOSSE-NEXT: andl $-8, %esp
694-
; X86-NOSSE-NEXT: subl $32, %esp
695-
; X86-NOSSE-NEXT: xorl %eax, %eax
696-
; X86-NOSSE-NEXT: xorl %edx, %edx
697-
; X86-NOSSE-NEXT: xorl %ecx, %ecx
698-
; X86-NOSSE-NEXT: xorl %ebx, %ebx
699-
; X86-NOSSE-NEXT: lock cmpxchg8b (%esp)
700-
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
688+
; X86-NOSSE-NEXT: subl $40, %esp
689+
; X86-NOSSE-NEXT: fildll (%esp)
690+
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
691+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
692+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
693+
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
701694
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
702695
; X86-NOSSE-NEXT: fld1
703696
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
@@ -723,13 +716,12 @@ define void @fadd_64stack() nounwind {
723716
; X86-SSE1-NEXT: movl %esp, %ebp
724717
; X86-SSE1-NEXT: pushl %ebx
725718
; X86-SSE1-NEXT: andl $-8, %esp
726-
; X86-SSE1-NEXT: subl $32, %esp
727-
; X86-SSE1-NEXT: xorl %eax, %eax
728-
; X86-SSE1-NEXT: xorl %edx, %edx
729-
; X86-SSE1-NEXT: xorl %ecx, %ecx
730-
; X86-SSE1-NEXT: xorl %ebx, %ebx
731-
; X86-SSE1-NEXT: lock cmpxchg8b (%esp)
732-
; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
719+
; X86-SSE1-NEXT: subl $40, %esp
720+
; X86-SSE1-NEXT: fildll (%esp)
721+
; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
722+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
723+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
724+
; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
733725
; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
734726
; X86-SSE1-NEXT: fld1
735727
; X86-SSE1-NEXT: faddl {{[0-9]+}}(%esp)
@@ -831,15 +823,14 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind {
831823
; X86-NOSSE-NEXT: pushl %edi
832824
; X86-NOSSE-NEXT: pushl %esi
833825
; X86-NOSSE-NEXT: andl $-8, %esp
834-
; X86-NOSSE-NEXT: subl $24, %esp
826+
; X86-NOSSE-NEXT: subl $32, %esp
835827
; X86-NOSSE-NEXT: movl 20(%ebp), %esi
836828
; X86-NOSSE-NEXT: movl 8(%ebp), %edi
837-
; X86-NOSSE-NEXT: xorl %eax, %eax
838-
; X86-NOSSE-NEXT: xorl %edx, %edx
839-
; X86-NOSSE-NEXT: xorl %ecx, %ecx
840-
; X86-NOSSE-NEXT: xorl %ebx, %ebx
841-
; X86-NOSSE-NEXT: lock cmpxchg8b (%edi,%esi,8)
842-
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
829+
; X86-NOSSE-NEXT: fildll (%edi,%esi,8)
830+
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
831+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
832+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
833+
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
843834
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
844835
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
845836
; X86-NOSSE-NEXT: faddl 12(%ebp)
@@ -869,15 +860,14 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind {
869860
; X86-SSE1-NEXT: pushl %edi
870861
; X86-SSE1-NEXT: pushl %esi
871862
; X86-SSE1-NEXT: andl $-8, %esp
872-
; X86-SSE1-NEXT: subl $24, %esp
863+
; X86-SSE1-NEXT: subl $32, %esp
873864
; X86-SSE1-NEXT: movl 20(%ebp), %esi
874865
; X86-SSE1-NEXT: movl 8(%ebp), %edi
875-
; X86-SSE1-NEXT: xorl %eax, %eax
876-
; X86-SSE1-NEXT: xorl %edx, %edx
877-
; X86-SSE1-NEXT: xorl %ecx, %ecx
878-
; X86-SSE1-NEXT: xorl %ebx, %ebx
879-
; X86-SSE1-NEXT: lock cmpxchg8b (%edi,%esi,8)
880-
; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
866+
; X86-SSE1-NEXT: fildll (%edi,%esi,8)
867+
; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
868+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
869+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
870+
; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
881871
; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
882872
; X86-SSE1-NEXT: fldl {{[0-9]+}}(%esp)
883873
; X86-SSE1-NEXT: faddl 12(%ebp)

0 commit comments

Comments
 (0)