-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[X86] Support ATOMIC_LOAD_FP_BINOP_MI for other binops #87524
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: AtariDreams (AtariDreams) ChangesSince we can bitcast and then do the same thing sub does in the table section above, I figured it was trivial to add fsub, fmul, and fdiv. Patch is 72.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/87524.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index ce3b6af4cab47b..ba20954cb779b3 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1125,7 +1125,10 @@ multiclass ATOMIC_LOAD_FP_BINOP_MI<string Name, SDNode op> {
Requires<[HasAVX512]>;
}
defm : ATOMIC_LOAD_FP_BINOP_MI<"ADD", fadd>;
-// FIXME: Add fsub, fmul, fdiv, ...
+defm : ATOMIC_LOAD_FP_BINOP_MI<"SUB", fsub>;
+defm : ATOMIC_LOAD_FP_BINOP_MI<"MUL", fmul>;
+defm : ATOMIC_LOAD_FP_BINOP_MI<"DIV", fdiv>;
+// FIXME: Add fcomi, fucomi, ...
multiclass RELEASE_UNOP<string Name, dag dag8, dag dag16, dag dag32,
dag dag64> {
diff --git a/llvm/test/CodeGen/X86/atomic-fp.ll b/llvm/test/CodeGen/X86/atomic-fp.ll
index 1094edd19af438..125d19b75726bc 100644
--- a/llvm/test/CodeGen/X86/atomic-fp.ll
+++ b/llvm/test/CodeGen/X86/atomic-fp.ll
@@ -777,3 +777,2053 @@ bb:
store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8
ret void
}
+
+; ----- FSUB -----
+
+define dso_local void @fsub_32r(ptr %loc, float %val) nounwind {
+; X86-NOSSE-LABEL: fsub_32r:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl (%eax), %ecx
+; X86-NOSSE-NEXT: movl %ecx, (%esp)
+; X86-NOSSE-NEXT: flds (%esp)
+; X86-NOSSE-NEXT: fsubs {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %ecx, (%eax)
+; X86-NOSSE-NEXT: addl $8, %esp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE1-LABEL: fsub_32r:
+; X86-SSE1: # %bb.0:
+; X86-SSE1-NEXT: subl $8, %esp
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: movl (%eax), %ecx
+; X86-SSE1-NEXT: movl %ecx, (%esp)
+; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT: subss {{[0-9]+}}(%esp), %xmm0
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE1-NEXT: movl %ecx, (%eax)
+; X86-SSE1-NEXT: addl $8, %esp
+; X86-SSE1-NEXT: retl
+;
+; X86-SSE2-LABEL: fsub_32r:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: subss {{[0-9]+}}(%esp), %xmm0
+; X86-SSE2-NEXT: movss %xmm0, (%eax)
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: fsub_32r:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X86-AVX-NEXT: vmovss %xmm0, (%eax)
+; X86-AVX-NEXT: retl
+;
+; X64-SSE-LABEL: fsub_32r:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-SSE-NEXT: subss %xmm0, %xmm1
+; X64-SSE-NEXT: movss %xmm1, (%rdi)
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: fsub_32r:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vmovss %xmm0, (%rdi)
+; X64-AVX-NEXT: retq
+ %1 = load atomic i32, ptr %loc seq_cst, align 4
+ %2 = bitcast i32 %1 to float
+ %sub = fsub float %2, %val
+ %3 = bitcast float %sub to i32
+ store atomic i32 %3, ptr %loc release, align 4
+ ret void
+}
+
+define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
+; X86-NOSSE-LABEL: fsub_64r:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
+; X86-NOSSE-NEXT: andl $-8, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: movl 8(%ebp), %eax
+; X86-NOSSE-NEXT: fildll (%eax)
+; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fsubl 12(%ebp)
+; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl %ecx, (%esp)
+; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fildll (%esp)
+; X86-NOSSE-NEXT: fistpll (%eax)
+; X86-NOSSE-NEXT: movl %ebp, %esp
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE1-LABEL: fsub_64r:
+; X86-SSE1: # %bb.0:
+; X86-SSE1-NEXT: pushl %ebp
+; X86-SSE1-NEXT: movl %esp, %ebp
+; X86-SSE1-NEXT: andl $-8, %esp
+; X86-SSE1-NEXT: subl $16, %esp
+; X86-SSE1-NEXT: movl 8(%ebp), %eax
+; X86-SSE1-NEXT: xorps %xmm0, %xmm0
+; X86-SSE1-NEXT: xorps %xmm1, %xmm1
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
+; X86-SSE1-NEXT: movss %xmm1, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: fldl (%esp)
+; X86-SSE1-NEXT: fsubl 12(%ebp)
+; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movlps %xmm0, (%eax)
+; X86-SSE1-NEXT: movl %ebp, %esp
+; X86-SSE1-NEXT: popl %ebp
+; X86-SSE1-NEXT: retl
+;
+; X86-SSE2-LABEL: fsub_64r:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-8, %esp
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movl 8(%ebp), %eax
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: subsd 12(%ebp), %xmm0
+; X86-SSE2-NEXT: movsd %xmm0, (%esp)
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movlps %xmm0, (%eax)
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: fsub_64r:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: pushl %ebp
+; X86-AVX-NEXT: movl %esp, %ebp
+; X86-AVX-NEXT: andl $-8, %esp
+; X86-AVX-NEXT: subl $8, %esp
+; X86-AVX-NEXT: movl 8(%ebp), %eax
+; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT: vsubsd 12(%ebp), %xmm0, %xmm0
+; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
+; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT: vmovlps %xmm0, (%eax)
+; X86-AVX-NEXT: movl %ebp, %esp
+; X86-AVX-NEXT: popl %ebp
+; X86-AVX-NEXT: retl
+;
+; X64-SSE-LABEL: fsub_64r:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; X64-SSE-NEXT: subsd %xmm0, %xmm1
+; X64-SSE-NEXT: movsd %xmm1, (%rdi)
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: fsub_64r:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vmovsd %xmm0, (%rdi)
+; X64-AVX-NEXT: retq
+ %1 = load atomic i64, ptr %loc seq_cst, align 8
+ %2 = bitcast i64 %1 to double
+ %sub = fsub double %2, %val
+ %3 = bitcast double %sub to i64
+ store atomic i64 %3, ptr %loc release, align 8
+ ret void
+}
+
+; Floating-point sub to a global using an immediate.
+define dso_local void @fsub_32g() nounwind {
+; X86-NOSSE-LABEL: fsub_32g:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: movl glob32, %eax
+; X86-NOSSE-NEXT: movl %eax, (%esp)
+; X86-NOSSE-NEXT: fld1
+; X86-NOSSE-NEXT: fchs
+; X86-NOSSE-NEXT: fadds (%esp)
+; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl %eax, glob32
+; X86-NOSSE-NEXT: addl $8, %esp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE1-LABEL: fsub_32g:
+; X86-SSE1: # %bb.0:
+; X86-SSE1-NEXT: subl $8, %esp
+; X86-SSE1-NEXT: movl glob32, %eax
+; X86-SSE1-NEXT: movl %eax, (%esp)
+; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: movl %eax, glob32
+; X86-SSE1-NEXT: addl $8, %esp
+; X86-SSE1-NEXT: retl
+;
+; X86-SSE2-LABEL: fsub_32g:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X86-SSE2-NEXT: addss glob32, %xmm0
+; X86-SSE2-NEXT: movss %xmm0, glob32
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: fsub_32g:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X86-AVX-NEXT: vaddss glob32, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovss %xmm0, glob32
+; X86-AVX-NEXT: retl
+;
+; X64-SSE-LABEL: fsub_32g:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X64-SSE-NEXT: addss glob32(%rip), %xmm0
+; X64-SSE-NEXT: movss %xmm0, glob32(%rip)
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: fsub_32g:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X64-AVX-NEXT: vaddss glob32(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vmovss %xmm0, glob32(%rip)
+; X64-AVX-NEXT: retq
+ %i = load atomic i32, ptr @glob32 monotonic, align 4
+ %f = bitcast i32 %i to float
+ %sub = fsub float %f, 1.000000e+00
+ %s = bitcast float %sub to i32
+ store atomic i32 %s, ptr @glob32 monotonic, align 4
+ ret void
+}
+
+define dso_local void @fsub_64g() nounwind {
+; X86-NOSSE-LABEL: fsub_64g:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
+; X86-NOSSE-NEXT: andl $-8, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: fildll glob64
+; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fld1
+; X86-NOSSE-NEXT: fchs
+; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %eax, (%esp)
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fildll (%esp)
+; X86-NOSSE-NEXT: fistpll glob64
+; X86-NOSSE-NEXT: movl %ebp, %esp
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE1-LABEL: fsub_64g:
+; X86-SSE1: # %bb.0:
+; X86-SSE1-NEXT: pushl %ebp
+; X86-SSE1-NEXT: movl %esp, %ebp
+; X86-SSE1-NEXT: andl $-8, %esp
+; X86-SSE1-NEXT: subl $16, %esp
+; X86-SSE1-NEXT: xorps %xmm0, %xmm0
+; X86-SSE1-NEXT: xorps %xmm1, %xmm1
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
+; X86-SSE1-NEXT: movss %xmm1, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: fld1
+; X86-SSE1-NEXT: fchs
+; X86-SSE1-NEXT: faddl (%esp)
+; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movlps %xmm0, glob64
+; X86-SSE1-NEXT: movl %ebp, %esp
+; X86-SSE1-NEXT: popl %ebp
+; X86-SSE1-NEXT: retl
+;
+; X86-SSE2-LABEL: fsub_64g:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-8, %esp
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: movsd %xmm0, (%esp)
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movlps %xmm0, glob64
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: fsub_64g:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: pushl %ebp
+; X86-AVX-NEXT: movl %esp, %ebp
+; X86-AVX-NEXT: andl $-8, %esp
+; X86-AVX-NEXT: subl $8, %esp
+; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
+; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT: vmovlps %xmm0, glob64
+; X86-AVX-NEXT: movl %ebp, %esp
+; X86-AVX-NEXT: popl %ebp
+; X86-AVX-NEXT: retl
+;
+; X64-SSE-LABEL: fsub_64g:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0]
+; X64-SSE-NEXT: addsd glob64(%rip), %xmm0
+; X64-SSE-NEXT: movsd %xmm0, glob64(%rip)
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: fsub_64g:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0]
+; X64-AVX-NEXT: vaddsd glob64(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vmovsd %xmm0, glob64(%rip)
+; X64-AVX-NEXT: retq
+ %i = load atomic i64, ptr @glob64 monotonic, align 8
+ %f = bitcast i64 %i to double
+ %sub = fsub double %f, 1.000000e+00
+ %s = bitcast double %sub to i64
+ store atomic i64 %s, ptr @glob64 monotonic, align 8
+ ret void
+}
+
+; Floating-point sub to a hard-coded immediate location using an immediate.
+define dso_local void @fsub_32imm() nounwind {
+; X86-NOSSE-LABEL: fsub_32imm:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: movl -559038737, %eax
+; X86-NOSSE-NEXT: movl %eax, (%esp)
+; X86-NOSSE-NEXT: fld1
+; X86-NOSSE-NEXT: fchs
+; X86-NOSSE-NEXT: fadds (%esp)
+; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl %eax, -559038737
+; X86-NOSSE-NEXT: addl $8, %esp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE1-LABEL: fsub_32imm:
+; X86-SSE1: # %bb.0:
+; X86-SSE1-NEXT: subl $8, %esp
+; X86-SSE1-NEXT: movl -559038737, %eax
+; X86-SSE1-NEXT: movl %eax, (%esp)
+; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: movl %eax, -559038737
+; X86-SSE1-NEXT: addl $8, %esp
+; X86-SSE1-NEXT: retl
+;
+; X86-SSE2-LABEL: fsub_32imm:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X86-SSE2-NEXT: addss -559038737, %xmm0
+; X86-SSE2-NEXT: movss %xmm0, -559038737
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: fsub_32imm:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X86-AVX-NEXT: vaddss -559038737, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovss %xmm0, -559038737
+; X86-AVX-NEXT: retl
+;
+; X64-SSE-LABEL: fsub_32imm:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
+; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X64-SSE-NEXT: addss (%rax), %xmm0
+; X64-SSE-NEXT: movss %xmm0, (%rax)
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: fsub_32imm:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
+; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X64-AVX-NEXT: vaddss (%rax), %xmm0, %xmm0
+; X64-AVX-NEXT: vmovss %xmm0, (%rax)
+; X64-AVX-NEXT: retq
+ %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
+ %f = bitcast i32 %i to float
+ %sub = fsub float %f, 1.000000e+00
+ %s = bitcast float %sub to i32
+ store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
+ ret void
+}
+
+define dso_local void @fsub_64imm() nounwind {
+; X86-NOSSE-LABEL: fsub_64imm:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
+; X86-NOSSE-NEXT: andl $-8, %esp
+; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: fildll -559038737
+; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fld1
+; X86-NOSSE-NEXT: fchs
+; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %eax, (%esp)
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fildll (%esp)
+; X86-NOSSE-NEXT: fistpll -559038737
+; X86-NOSSE-NEXT: movl %ebp, %esp
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE1-LABEL: fsub_64imm:
+; X86-SSE1: # %bb.0:
+; X86-SSE1-NEXT: pushl %ebp
+; X86-SSE1-NEXT: movl %esp, %ebp
+; X86-SSE1-NEXT: andl $-8, %esp
+; X86-SSE1-NEXT: subl $16, %esp
+; X86-SSE1-NEXT: xorps %xmm0, %xmm0
+; X86-SSE1-NEXT: xorps %xmm1, %xmm1
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
+; X86-SSE1-NEXT: movss %xmm1, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
+; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: fld1
+; X86-SSE1-NEXT: fchs
+; X86-SSE1-NEXT: faddl (%esp)
+; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movlps %xmm0, -559038737
+; X86-SSE1-NEXT: movl %ebp, %esp
+; X86-SSE1-NEXT: popl %ebp
+; X86-SSE1-NEXT: retl
+;
+; X86-SSE2-LABEL: fsub_64imm:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-8, %esp
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: movsd %xmm0, (%esp)
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movlps %xmm0, -559038737
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: fsub_64imm:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: pushl %ebp
+; X86-AVX-NEXT: movl %esp, %ebp
+; X86-AVX-NEXT: andl $-8, %esp
+; X86-AVX-NEXT: subl $8, %esp
+; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
+; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT: vmovlps %xmm0, -559038737
+; X86-AVX-NEXT: movl %ebp, %esp
+; X86-AVX-NEXT: popl %ebp
+; X86-AVX-NEXT: retl
+;
+; X64-SSE-LABEL: fsub_64imm:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
+; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0]
+; X64-SSE-NEXT: addsd (%rax), %xmm0
+; X64-SSE-NEXT: movsd %xmm0, (%rax)
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: fsub_64imm:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
+; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0]
+; X64-AVX-NEXT: vaddsd (%rax), %xmm0, %xmm0
+; X64-AVX-NEXT: vmovsd %xmm0, (%rax)
+; X64-AVX-NEXT: retq
+ %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
+ %f = bitcast i64 %i to double
+ %sub = fsub double %f, 1.000000e+00
+ %s = bitcast double %sub to i64
+ store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
+ ret void
+}
+
+; Floating-point sub to a stack location.
+define dso_local void @fsub_32stack() nounwind {
+; X86-NOSSE-LABEL: fsub_32stack:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: subl $12, %esp
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl %eax, (%esp)
+; X86-NOSSE-NEXT: fld1
+; X86-NOSSE-NEXT: fsubs (%esp)
+; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE1-LABEL: fsub_32stack:
+; X86-SSE1: # %bb.0:
+; X86-SSE1-NEXT: subl $12, %esp
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: movl %eax, (%esp)
+; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X86-SSE1-NEXT: subss (%esp), %xmm0
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: addl $12, %...
[truncated]
|
|
2dc6612
to
d046915
Compare
@phoebewang What do you think about this? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@phoebewang What do you think about this?
I don't know the background, just looked two patches 8662083 and 586fad5.
My guess it's a bit complex for the first patch to support them. The second one simplified it and should expand to others. I don't see problem here, but maybe @topperc should take a look.
f40dee0
to
b8e135e
Compare
@topperc thoughts? |
760e4d5
to
53d7d75
Compare
Since we can bitcast and then do the same thing sub does in the table section above, I figured it was trivial to add fsub, fmul, and fdiv.
@phoebewang Thoughts on this? |
Why do you keep force pushing this patch? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@topperc can we please merge this |
Since we can bitcast and then do the same thing sub does in the table section above, I figured it was trivial to add fsub, fmul, and fdiv. Signed-off-by: Hafidz Muzakky <[email protected]>
Since we can bitcast and then do the same thing sub does in the table section above, I figured it was trivial to add fsub, fmul, and fdiv.