Skip to content

Commit 586fad5

Browse files
committed
[X86] Add patterns for using movss/movsd for atomic load/store of f32/64. Remove atomic fadd pseudos use isel patterns instead.
This patch adds patterns for turning bitcasted atomic load/store into movss/sd. It also removes the pseudo instructions for atomic RMW fadd. Instead just adding isel patterns for folding an atomic load into addss/sd. And relying on the new movss/sd store pattern to handle the write part. This also makes the fadd patterns use VEX and EVEX instructions when AVX or AVX512F are enabled. Differential Revision: https://reviews.llvm.org/D60394 llvm-svn: 358215
1 parent f7e548c commit 586fad5

File tree

4 files changed

+130
-141
lines changed

4 files changed

+130
-141
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

-51
Original file line numberDiff line numberDiff line change
@@ -29090,53 +29090,6 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
2909029090
return SinkMBB;
2909129091
}
2909229092

29093-
MachineBasicBlock *
29094-
X86TargetLowering::EmitLoweredAtomicFP(MachineInstr &MI,
29095-
MachineBasicBlock *BB) const {
29096-
// Combine the following atomic floating-point modification pattern:
29097-
// a.store(reg OP a.load(acquire), release)
29098-
// Transform them into:
29099-
// OPss (%gpr), %xmm
29100-
// movss %xmm, (%gpr)
29101-
// Or sd equivalent for 64-bit operations.
29102-
unsigned MOp, FOp;
29103-
switch (MI.getOpcode()) {
29104-
default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP");
29105-
case X86::RELEASE_FADD32mr:
29106-
FOp = X86::ADDSSrm;
29107-
MOp = X86::MOVSSmr;
29108-
break;
29109-
case X86::RELEASE_FADD64mr:
29110-
FOp = X86::ADDSDrm;
29111-
MOp = X86::MOVSDmr;
29112-
break;
29113-
}
29114-
const X86InstrInfo *TII = Subtarget.getInstrInfo();
29115-
DebugLoc DL = MI.getDebugLoc();
29116-
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
29117-
unsigned ValOpIdx = X86::AddrNumOperands;
29118-
unsigned VSrc = MI.getOperand(ValOpIdx).getReg();
29119-
MachineInstrBuilder MIB =
29120-
BuildMI(*BB, MI, DL, TII->get(FOp),
29121-
MRI.createVirtualRegister(MRI.getRegClass(VSrc)))
29122-
.addReg(VSrc);
29123-
for (int i = 0; i < X86::AddrNumOperands; ++i) {
29124-
MachineOperand &Operand = MI.getOperand(i);
29125-
// Clear any kill flags on register operands as we'll create a second
29126-
// instruction using the same address operands.
29127-
if (Operand.isReg())
29128-
Operand.setIsKill(false);
29129-
MIB.add(Operand);
29130-
}
29131-
MachineInstr *FOpMI = MIB;
29132-
MIB = BuildMI(*BB, MI, DL, TII->get(MOp));
29133-
for (int i = 0; i < X86::AddrNumOperands; ++i)
29134-
MIB.add(MI.getOperand(i));
29135-
MIB.addReg(FOpMI->getOperand(0).getReg(), RegState::Kill);
29136-
MI.eraseFromParent(); // The pseudo instruction is gone now.
29137-
return BB;
29138-
}
29139-
2914029093
MachineBasicBlock *
2914129094
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
2914229095
MachineBasicBlock *BB) const {
@@ -30372,10 +30325,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3037230325
return BB;
3037330326
}
3037430327

30375-
case X86::RELEASE_FADD32mr:
30376-
case X86::RELEASE_FADD64mr:
30377-
return EmitLoweredAtomicFP(MI, BB);
30378-
3037930328
case X86::FP32_TO_INT16_IN_MEM:
3038030329
case X86::FP32_TO_INT32_IN_MEM:
3038130330
case X86::FP32_TO_INT64_IN_MEM:

llvm/lib/Target/X86/X86InstrCompiler.td

+51-19
Original file line numberDiff line numberDiff line change
@@ -996,28 +996,31 @@ defm : RELEASE_BINOP_MI<"OR", or>;
996996
defm : RELEASE_BINOP_MI<"XOR", xor>;
997997
defm : RELEASE_BINOP_MI<"SUB", sub>;
998998

999-
// Same as above, but for floating-point.
1000-
// FIXME: imm version.
1001-
// FIXME: Version that doesn't clobber $src, using AVX's VADDSS.
999+
// Atomic load + floating point patterns.
10021000
// FIXME: This could also handle SIMD operations with *ps and *pd instructions.
1003-
let usesCustomInserter = 1, SchedRW = [WriteMicrocoded] in {
1004-
multiclass RELEASE_FP_BINOP_MI<SDNode op> {
1005-
def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src),
1006-
"#BINOP "#NAME#"32mr PSEUDO!",
1007-
[(atomic_store_32 addr:$dst,
1008-
(i32 (bitconvert (op
1009-
(f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))),
1010-
FR32:$src))))]>, Requires<[HasSSE1]>;
1011-
def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src),
1012-
"#BINOP "#NAME#"64mr PSEUDO!",
1013-
[(atomic_store_64 addr:$dst,
1014-
(i64 (bitconvert (op
1015-
(f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))),
1016-
FR64:$src))))]>, Requires<[HasSSE2]>;
1001+
multiclass ATOMIC_LOAD_FP_BINOP_MI<string Name, SDNode op> {
1002+
def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))),
1003+
(!cast<Instruction>(Name#"SSrm") FR32:$src1, addr:$src2)>,
1004+
Requires<[UseSSE1]>;
1005+
def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))),
1006+
(!cast<Instruction>("V"#Name#"SSrm") FR32:$src1, addr:$src2)>,
1007+
Requires<[UseAVX]>;
1008+
def : Pat<(op FR32X:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))),
1009+
(!cast<Instruction>("V"#Name#"SSZrm") FR32X:$src1, addr:$src2)>,
1010+
Requires<[HasAVX512]>;
1011+
1012+
def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))),
1013+
(!cast<Instruction>(Name#"SDrm") FR64:$src1, addr:$src2)>,
1014+
Requires<[UseSSE1]>;
1015+
def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))),
1016+
(!cast<Instruction>("V"#Name#"SDrm") FR64:$src1, addr:$src2)>,
1017+
Requires<[UseAVX]>;
1018+
def : Pat<(op FR64X:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))),
1019+
(!cast<Instruction>("V"#Name#"SDZrm") FR64X:$src1, addr:$src2)>,
1020+
Requires<[HasAVX512]>;
10171021
}
1018-
defm RELEASE_FADD : RELEASE_FP_BINOP_MI<fadd>;
1022+
defm : ATOMIC_LOAD_FP_BINOP_MI<"ADD", fadd>;
10191023
// FIXME: Add fsub, fmul, fdiv, ...
1020-
}
10211024

10221025
multiclass RELEASE_UNOP<string Name, dag dag8, dag dag16, dag dag32,
10231026
dag dag64> {
@@ -1078,6 +1081,35 @@ def : Pat<(i16 (atomic_load_16 addr:$src)), (MOV16rm addr:$src)>;
10781081
def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>;
10791082
def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>;
10801083

1084+
// Floating point loads/stores.
1085+
def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
1086+
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;
1087+
def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
1088+
(VMOVSSmr addr:$dst, FR32:$src)>, Requires<[UseAVX]>;
1089+
def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
1090+
(VMOVSSZmr addr:$dst, FR32:$src)>, Requires<[HasAVX512]>;
1091+
1092+
def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
1093+
(MOVSDmr addr:$dst, FR64:$src)>, Requires<[UseSSE2]>;
1094+
def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
1095+
(VMOVSDmr addr:$dst, FR64:$src)>, Requires<[UseAVX]>;
1096+
def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
1097+
(VMOVSDmr addr:$dst, FR64:$src)>, Requires<[HasAVX512]>;
1098+
1099+
def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))),
1100+
(MOVSSrm addr:$src)>, Requires<[UseSSE1]>;
1101+
def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))),
1102+
(VMOVSSrm addr:$src)>, Requires<[UseAVX]>;
1103+
def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))),
1104+
(VMOVSSZrm addr:$src)>, Requires<[HasAVX512]>;
1105+
1106+
def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
1107+
(MOVSDrm addr:$src)>, Requires<[UseSSE2]>;
1108+
def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
1109+
(VMOVSDrm addr:$src)>, Requires<[UseAVX]>;
1110+
def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
1111+
(VMOVSDZrm addr:$src)>, Requires<[HasAVX512]>;
1112+
10811113
//===----------------------------------------------------------------------===//
10821114
// DAG Pattern Matching Rules
10831115
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/X86/atomic-fp.ll

+59-41
Original file line numberDiff line numberDiff line change
@@ -41,25 +41,31 @@ define void @fadd_32r(float* %loc, float %val) nounwind {
4141
;
4242
; X86-SSE2-LABEL: fadd_32r:
4343
; X86-SSE2: # %bb.0:
44-
; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4544
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
45+
; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4646
; X86-SSE2-NEXT: addss (%eax), %xmm0
4747
; X86-SSE2-NEXT: movss %xmm0, (%eax)
4848
; X86-SSE2-NEXT: retl
4949
;
5050
; X86-AVX-LABEL: fadd_32r:
5151
; X86-AVX: # %bb.0:
52-
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5352
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
54-
; X86-AVX-NEXT: addss (%eax), %xmm0
55-
; X86-AVX-NEXT: movss %xmm0, (%eax)
53+
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
54+
; X86-AVX-NEXT: vaddss (%eax), %xmm0, %xmm0
55+
; X86-AVX-NEXT: vmovss %xmm0, (%eax)
5656
; X86-AVX-NEXT: retl
5757
;
58-
; X64-LABEL: fadd_32r:
59-
; X64: # %bb.0:
60-
; X64-NEXT: addss (%rdi), %xmm0
61-
; X64-NEXT: movss %xmm0, (%rdi)
62-
; X64-NEXT: retq
58+
; X64-SSE-LABEL: fadd_32r:
59+
; X64-SSE: # %bb.0:
60+
; X64-SSE-NEXT: addss (%rdi), %xmm0
61+
; X64-SSE-NEXT: movss %xmm0, (%rdi)
62+
; X64-SSE-NEXT: retq
63+
;
64+
; X64-AVX-LABEL: fadd_32r:
65+
; X64-AVX: # %bb.0:
66+
; X64-AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0
67+
; X64-AVX-NEXT: vmovss %xmm0, (%rdi)
68+
; X64-AVX-NEXT: retq
6369
%floc = bitcast float* %loc to i32*
6470
%1 = load atomic i32, i32* %floc seq_cst, align 4
6571
%2 = bitcast i32 %1 to float
@@ -194,11 +200,17 @@ define void @fadd_64r(double* %loc, double %val) nounwind {
194200
; X86-AVX-NEXT: popl %ebp
195201
; X86-AVX-NEXT: retl
196202
;
197-
; X64-LABEL: fadd_64r:
198-
; X64: # %bb.0:
199-
; X64-NEXT: addsd (%rdi), %xmm0
200-
; X64-NEXT: movsd %xmm0, (%rdi)
201-
; X64-NEXT: retq
203+
; X64-SSE-LABEL: fadd_64r:
204+
; X64-SSE: # %bb.0:
205+
; X64-SSE-NEXT: addsd (%rdi), %xmm0
206+
; X64-SSE-NEXT: movsd %xmm0, (%rdi)
207+
; X64-SSE-NEXT: retq
208+
;
209+
; X64-AVX-LABEL: fadd_64r:
210+
; X64-AVX: # %bb.0:
211+
; X64-AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0
212+
; X64-AVX-NEXT: vmovsd %xmm0, (%rdi)
213+
; X64-AVX-NEXT: retq
202214
%floc = bitcast double* %loc to i64*
203215
%1 = load atomic i64, i64* %floc seq_cst, align 8
204216
%2 = bitcast i64 %1 to double
@@ -249,8 +261,8 @@ define void @fadd_32g() nounwind {
249261
; X86-AVX-LABEL: fadd_32g:
250262
; X86-AVX: # %bb.0:
251263
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
252-
; X86-AVX-NEXT: addss glob32, %xmm0
253-
; X86-AVX-NEXT: movss %xmm0, glob32
264+
; X86-AVX-NEXT: vaddss glob32, %xmm0, %xmm0
265+
; X86-AVX-NEXT: vmovss %xmm0, glob32
254266
; X86-AVX-NEXT: retl
255267
;
256268
; X64-SSE-LABEL: fadd_32g:
@@ -263,8 +275,8 @@ define void @fadd_32g() nounwind {
263275
; X64-AVX-LABEL: fadd_32g:
264276
; X64-AVX: # %bb.0:
265277
; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
266-
; X64-AVX-NEXT: addss {{.*}}(%rip), %xmm0
267-
; X64-AVX-NEXT: movss %xmm0, {{.*}}(%rip)
278+
; X64-AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
279+
; X64-AVX-NEXT: vmovss %xmm0, {{.*}}(%rip)
268280
; X64-AVX-NEXT: retq
269281
%i = load atomic i32, i32* bitcast (float* @glob32 to i32*) monotonic, align 4
270282
%f = bitcast i32 %i to float
@@ -397,8 +409,8 @@ define void @fadd_64g() nounwind {
397409
; X64-AVX-LABEL: fadd_64g:
398410
; X64-AVX: # %bb.0:
399411
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
400-
; X64-AVX-NEXT: addsd {{.*}}(%rip), %xmm0
401-
; X64-AVX-NEXT: movsd %xmm0, {{.*}}(%rip)
412+
; X64-AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
413+
; X64-AVX-NEXT: vmovsd %xmm0, {{.*}}(%rip)
402414
; X64-AVX-NEXT: retq
403415
%i = load atomic i64, i64* bitcast (double* @glob64 to i64*) monotonic, align 8
404416
%f = bitcast i64 %i to double
@@ -446,24 +458,24 @@ define void @fadd_32imm() nounwind {
446458
; X86-AVX-LABEL: fadd_32imm:
447459
; X86-AVX: # %bb.0:
448460
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
449-
; X86-AVX-NEXT: addss -559038737, %xmm0
450-
; X86-AVX-NEXT: movss %xmm0, -559038737
461+
; X86-AVX-NEXT: vaddss -559038737, %xmm0, %xmm0
462+
; X86-AVX-NEXT: vmovss %xmm0, -559038737
451463
; X86-AVX-NEXT: retl
452464
;
453465
; X64-SSE-LABEL: fadd_32imm:
454466
; X64-SSE: # %bb.0:
455-
; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
456467
; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
468+
; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
457469
; X64-SSE-NEXT: addss (%rax), %xmm0
458470
; X64-SSE-NEXT: movss %xmm0, (%rax)
459471
; X64-SSE-NEXT: retq
460472
;
461473
; X64-AVX-LABEL: fadd_32imm:
462474
; X64-AVX: # %bb.0:
463-
; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
464475
; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
465-
; X64-AVX-NEXT: addss (%rax), %xmm0
466-
; X64-AVX-NEXT: movss %xmm0, (%rax)
476+
; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
477+
; X64-AVX-NEXT: vaddss (%rax), %xmm0, %xmm0
478+
; X64-AVX-NEXT: vmovss %xmm0, (%rax)
467479
; X64-AVX-NEXT: retq
468480
%i = load atomic i32, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4
469481
%f = bitcast i32 %i to float
@@ -588,18 +600,18 @@ define void @fadd_64imm() nounwind {
588600
;
589601
; X64-SSE-LABEL: fadd_64imm:
590602
; X64-SSE: # %bb.0:
591-
; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
592603
; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
604+
; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
593605
; X64-SSE-NEXT: addsd (%rax), %xmm0
594606
; X64-SSE-NEXT: movsd %xmm0, (%rax)
595607
; X64-SSE-NEXT: retq
596608
;
597609
; X64-AVX-LABEL: fadd_64imm:
598610
; X64-AVX: # %bb.0:
599-
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
600611
; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
601-
; X64-AVX-NEXT: addsd (%rax), %xmm0
602-
; X64-AVX-NEXT: movsd %xmm0, (%rax)
612+
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
613+
; X64-AVX-NEXT: vaddsd (%rax), %xmm0, %xmm0
614+
; X64-AVX-NEXT: vmovsd %xmm0, (%rax)
603615
; X64-AVX-NEXT: retq
604616
%i = load atomic i64, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8
605617
%f = bitcast i64 %i to double
@@ -650,8 +662,8 @@ define void @fadd_32stack() nounwind {
650662
; X86-AVX: # %bb.0:
651663
; X86-AVX-NEXT: pushl %eax
652664
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
653-
; X86-AVX-NEXT: addss (%esp), %xmm0
654-
; X86-AVX-NEXT: movss %xmm0, (%esp)
665+
; X86-AVX-NEXT: vaddss (%esp), %xmm0, %xmm0
666+
; X86-AVX-NEXT: vmovss %xmm0, (%esp)
655667
; X86-AVX-NEXT: popl %eax
656668
; X86-AVX-NEXT: retl
657669
;
@@ -665,8 +677,8 @@ define void @fadd_32stack() nounwind {
665677
; X64-AVX-LABEL: fadd_32stack:
666678
; X64-AVX: # %bb.0:
667679
; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
668-
; X64-AVX-NEXT: addss -{{[0-9]+}}(%rsp), %xmm0
669-
; X64-AVX-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
680+
; X64-AVX-NEXT: vaddss -{{[0-9]+}}(%rsp), %xmm0, %xmm0
681+
; X64-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
670682
; X64-AVX-NEXT: retq
671683
%ptr = alloca i32, align 4
672684
%bc3 = bitcast i32* %ptr to float*
@@ -801,8 +813,8 @@ define void @fadd_64stack() nounwind {
801813
; X64-AVX-LABEL: fadd_64stack:
802814
; X64-AVX: # %bb.0:
803815
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
804-
; X64-AVX-NEXT: addsd -{{[0-9]+}}(%rsp), %xmm0
805-
; X64-AVX-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
816+
; X64-AVX-NEXT: vaddsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
817+
; X64-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
806818
; X64-AVX-NEXT: retq
807819
%ptr = alloca i64, align 8
808820
%bc3 = bitcast i64* %ptr to double*
@@ -951,11 +963,17 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind {
951963
; X86-AVX-NEXT: popl %ebp
952964
; X86-AVX-NEXT: retl
953965
;
954-
; X64-LABEL: fadd_array:
955-
; X64: # %bb.0: # %bb
956-
; X64-NEXT: addsd (%rdi,%rsi,8), %xmm0
957-
; X64-NEXT: movsd %xmm0, (%rdi,%rsi,8)
958-
; X64-NEXT: retq
966+
; X64-SSE-LABEL: fadd_array:
967+
; X64-SSE: # %bb.0: # %bb
968+
; X64-SSE-NEXT: addsd (%rdi,%rsi,8), %xmm0
969+
; X64-SSE-NEXT: movsd %xmm0, (%rdi,%rsi,8)
970+
; X64-SSE-NEXT: retq
971+
;
972+
; X64-AVX-LABEL: fadd_array:
973+
; X64-AVX: # %bb.0: # %bb
974+
; X64-AVX-NEXT: vaddsd (%rdi,%rsi,8), %xmm0, %xmm0
975+
; X64-AVX-NEXT: vmovsd %xmm0, (%rdi,%rsi,8)
976+
; X64-AVX-NEXT: retq
959977
bb:
960978
%tmp4 = getelementptr inbounds i64, i64* %arg, i64 %arg2
961979
%tmp6 = load atomic i64, i64* %tmp4 monotonic, align 8

0 commit comments

Comments
 (0)