-
Notifications
You must be signed in to change notification settings - Fork 13.6k
LegalizeVectorTypes: fix bug in widening of vec result in xrint #71198
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Fix a bug introduced in 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering), where ISD::LRINT and ISD::LLRINT used WidenVecRes_Unary to widen the vector result. This leads to incorrect CodeGen for RISC-V fixed-vectors of length 3, and a crash in SelectionDAG when we try to lower llvm.lrint.vxi32.vxf64 on i686. Fix the bug by implementing a correct WidenVecRes_XRINT. Fixes llvm#71187.
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-selectiondag Author: Ramkumar Ramachandra (artagnon) ChangesFix a bug introduced in 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering), where ISD::LRINT and ISD::LLRINT used WidenVecRes_Unary to widen the vector result. This leads to incorrect CodeGen for RISC-V fixed-vectors of length 3, and a crash in SelectionDAG when we try to lower llvm.lrint.vxi32.vxf64 on i686. Fix the bug by implementing a correct WidenVecRes_XRINT. Fixes #71187. Patch is 25.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71198.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index c48d6c4adf61517..f85c1296cdce856 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -987,6 +987,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecRes_Convert(SDNode *N);
SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
+ SDValue WidenVecRes_XRINT(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
SDValue WidenVecRes_ExpOp(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 02d9ce4f0a44d4b..a1a9f0f0615cbc7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4204,6 +4204,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_FP_TO_XINT_SAT(N);
break;
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ Res = WidenVecRes_XRINT(N);
+ break;
+
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
@@ -4216,8 +4221,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FLOG2:
case ISD::FNEARBYINT:
case ISD::FRINT:
- case ISD::LRINT:
- case ISD::LLRINT:
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FSIN:
@@ -4791,6 +4794,27 @@ SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, N->getOperand(1));
}
+SDValue DAGTypeLegalizer::WidenVecRes_XRINT(SDNode *N) {
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ElementCount WidenNumElts = WidenVT.getVectorElementCount();
+
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // Also widen the input.
+ if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) {
+ Src = GetWidenedVector(Src);
+ SrcVT = Src.getValueType();
+ }
+
+ // Input and output not widened to the same size, give up.
+ if (WidenNumElts != SrcVT.getVectorElementCount())
+ return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
+
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
SDValue InOp = N->getOperand(1);
SDLoc DL(N);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
index ff83e7c8c32ae9b..5d34cd6592702e2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
@@ -150,7 +150,22 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) {
; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf@plt
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
index cd4eec44920c951..7cb864546cebcb9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
@@ -111,11 +111,14 @@ define <3 x iXLen> @lrint_v3f32(<3 x float> %x) {
; RV64-i32-NEXT: vfmv.f.s fa5, v10
; RV64-i32-NEXT: fcvt.l.s a0, fa5
; RV64-i32-NEXT: vslide1down.vx v9, v9, a0
-; RV64-i32-NEXT: vslidedown.vi v8, v8, 2
+; RV64-i32-NEXT: vslidedown.vi v10, v8, 2
+; RV64-i32-NEXT: vfmv.f.s fa5, v10
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vslide1down.vx v9, v9, a0
+; RV64-i32-NEXT: vslidedown.vi v8, v8, 3
; RV64-i32-NEXT: vfmv.f.s fa5, v8
; RV64-i32-NEXT: fcvt.l.s a0, fa5
; RV64-i32-NEXT: vslide1down.vx v8, v9, a0
-; RV64-i32-NEXT: vslidedown.vi v8, v8, 1
; RV64-i32-NEXT: ret
;
; RV64-i64-LABEL: lrint_v3f32:
diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll
index 7373cd32df98d49..43d12fdeffa9a91 100644
--- a/llvm/test/CodeGen/X86/vector-lrint.ll
+++ b/llvm/test/CodeGen/X86/vector-lrint.ll
@@ -2,12 +2,10 @@
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=X86-SSE2
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefix=X86-AVX
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefix=X86-AVX
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64-SSE
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64-SSE
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX-i32
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX-i32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX-i64
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX-i64
define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
; X86-SSE2-LABEL: lrint_v1f32:
@@ -19,6 +17,16 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: vcvtss2si {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: retl
+;
+; X64-AVX-i32-LABEL: lrint_v1f32:
+; X64-AVX-i32: # %bb.0:
+; X64-AVX-i32-NEXT: vcvtss2si %xmm0, %eax
+; X64-AVX-i32-NEXT: retq
+;
+; X64-AVX-i64-LABEL: lrint_v1f32:
+; X64-AVX-i64: # %bb.0:
+; X64-AVX-i64-NEXT: vcvtss2si %xmm0, %rax
+; X64-AVX-i64-NEXT: retq
%a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x)
ret <1 x iXLen> %a
}
@@ -60,6 +68,31 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
; X86-AVX-NEXT: vcvtss2si %xmm0, %eax
; X86-AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
; X86-AVX-NEXT: retl
+;
+; X64-AVX-i32-LABEL: lrint_v2f32:
+; X64-AVX-i32: # %bb.0:
+; X64-AVX-i32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; X64-AVX-i32-NEXT: vcvtss2si %xmm1, %eax
+; X64-AVX-i32-NEXT: vcvtss2si %xmm0, %ecx
+; X64-AVX-i32-NEXT: vmovd %ecx, %xmm1
+; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
+; X64-AVX-i32-NEXT: vcvtss2si %xmm2, %eax
+; X64-AVX-i32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; X64-AVX-i32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; X64-AVX-i32-NEXT: vcvtss2si %xmm0, %eax
+; X64-AVX-i32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
+; X64-AVX-i32-NEXT: retq
+;
+; X64-AVX-i64-LABEL: lrint_v2f32:
+; X64-AVX-i64: # %bb.0:
+; X64-AVX-i64-NEXT: vcvtss2si %xmm0, %rax
+; X64-AVX-i64-NEXT: vmovq %rax, %xmm1
+; X64-AVX-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X64-AVX-i64-NEXT: vcvtss2si %xmm0, %rax
+; X64-AVX-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X64-AVX-i64-NEXT: retq
%a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
ret <2 x iXLen> %a
}
@@ -101,6 +134,21 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
; X86-AVX-NEXT: vcvtss2si %xmm0, %eax
; X86-AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
; X86-AVX-NEXT: retl
+;
+; X64-AVX-i32-LABEL: lrint_v4f32:
+; X64-AVX-i32: # %bb.0:
+; X64-AVX-i32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; X64-AVX-i32-NEXT: vcvtss2si %xmm1, %eax
+; X64-AVX-i32-NEXT: vcvtss2si %xmm0, %ecx
+; X64-AVX-i32-NEXT: vmovd %ecx, %xmm1
+; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
+; X64-AVX-i32-NEXT: vcvtss2si %xmm2, %eax
+; X64-AVX-i32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; X64-AVX-i32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; X64-AVX-i32-NEXT: vcvtss2si %xmm0, %eax
+; X64-AVX-i32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
+; X64-AVX-i32-NEXT: retq
%a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x)
ret <4 x iXLen> %a
}
@@ -154,193 +202,131 @@ define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) {
}
declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
-define <1 x i64> @lrint_v1f64(<1 x double> %x) {
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
; X86-SSE2-LABEL: lrint_v1f64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pushl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT: .cfi_offset %ebp, -8
-; X86-SSE2-NEXT: movl %esp, %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
-; X86-SSE2-NEXT: andl $-8, %esp
-; X86-SSE2-NEXT: subl $8, %esp
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movsd %xmm0, (%esp)
-; X86-SSE2-NEXT: fldl (%esp)
-; X86-SSE2-NEXT: fistpll (%esp)
-; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movd %xmm0, %eax
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-SSE2-NEXT: movd %xmm0, %edx
-; X86-SSE2-NEXT: movl %ebp, %esp
-; X86-SSE2-NEXT: popl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: lrint_v1f64:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: pushl %ebp
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %ebp, -8
-; X86-AVX-NEXT: movl %esp, %ebp
-; X86-AVX-NEXT: .cfi_def_cfa_register %ebp
-; X86-AVX-NEXT: andl $-8, %esp
-; X86-AVX-NEXT: subl $8, %esp
-; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT: fldl (%esp)
-; X86-AVX-NEXT: fistpll (%esp)
-; X86-AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT: vmovd %xmm0, %eax
-; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx
-; X86-AVX-NEXT: movl %ebp, %esp
-; X86-AVX-NEXT: popl %ebp
-; X86-AVX-NEXT: .cfi_def_cfa %esp, 4
+; X86-AVX-NEXT: vcvtsd2si {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: retl
;
-; X64-SSE-LABEL: lrint_v1f64:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: cvtsd2si %xmm0, %rax
-; X64-SSE-NEXT: retq
+; X64-AVX-i32-LABEL: lrint_v1f64:
+; X64-AVX-i32: # %bb.0:
+; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %eax
+; X64-AVX-i32-NEXT: retq
;
-; X64-AVX-LABEL: lrint_v1f64:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax
-; X64-AVX-NEXT: retq
- %a = call <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double> %x)
- ret <1 x i64> %a
+; X64-AVX-i64-LABEL: lrint_v1f64:
+; X64-AVX-i64: # %bb.0:
+; X64-AVX-i64-NEXT: vcvtsd2si %xmm0, %rax
+; X64-AVX-i64-NEXT: retq
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
+ ret <1 x iXLen> %a
}
-declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>)
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
-define <2 x i64> @lrint_v2f64(<2 x double> %x) {
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
; X86-SSE2-LABEL: lrint_v2f64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pushl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT: .cfi_offset %ebp, -8
-; X86-SSE2-NEXT: movl %esp, %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
-; X86-SSE2-NEXT: andl $-8, %esp
-; X86-SSE2-NEXT: subl $16, %esp
-; X86-SSE2-NEXT: movhps %xmm0, (%esp)
-; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: fldl (%esp)
-; X86-SSE2-NEXT: fistpll (%esp)
-; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; X86-SSE2-NEXT: movl %ebp, %esp
-; X86-SSE2-NEXT: popl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
+; X86-SSE2-NEXT: movd %eax, %xmm1
+; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: lrint_v2f64:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: pushl %ebp
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %ebp, -8
-; X86-AVX-NEXT: movl %esp, %ebp
-; X86-AVX-NEXT: .cfi_def_cfa_register %ebp
-; X86-AVX-NEXT: andl $-8, %esp
-; X86-AVX-NEXT: subl $16, %esp
-; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
-; X86-AVX-NEXT: vmovhps %xmm0, (%esp)
-; X86-AVX-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-AVX-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-AVX-NEXT: fldl (%esp)
-; X86-AVX-NEXT: fistpll (%esp)
-; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-AVX-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; X86-AVX-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
-; X86-AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; X86-AVX-NEXT: movl %ebp, %esp
-; X86-AVX-NEXT: popl %ebp
-; X86-AVX-NEXT: .cfi_def_cfa %esp, 4
+; X86-AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
+; X86-AVX-NEXT: vcvtsd2si %xmm1, %eax
+; X86-AVX-NEXT: vcvtsd2si %xmm0, %ecx
+; X86-AVX-NEXT: vmovd %ecx, %xmm0
+; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
-; X64-SSE-LABEL: lrint_v2f64:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: cvtsd2si %xmm0, %rax
-; X64-SSE-NEXT: movq %rax, %xmm1
-; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; X64-SSE-NEXT: cvtsd2si %xmm0, %rax
-; X64-SSE-NEXT: movq %rax, %xmm0
-; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; X64-SSE-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE-NEXT: retq
+; X64-AVX-i32-LABEL: lrint_v2f64:
+; X64-AVX-i32: # %bb.0:
+; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
+; X64-AVX-i32-NEXT: vcvtsd2si %xmm1, %eax
+; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %ecx
+; X64-AVX-i32-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X64-AVX-i32-NEXT: retq
;
-; X64-AVX-LABEL: lrint_v2f64:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax
-; X64-AVX-NEXT: vmovq %rax, %xmm1
-; X64-AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
-; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax
-; X64-AVX-NEXT: vmovq %rax, %xmm0
-; X64-AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; X64-AVX-NEXT: retq
- %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x)
- ret <2 x i64> %a
+; X64-AVX-i64-LABEL: lrint_v2f64:
+; X64-AVX-i64: # %bb.0:
+; X64-AVX-i64-NEXT: vcvtsd2si %xmm0, %rax
+; X64-AVX-i64-NEXT: vmovq %rax, %xmm1
+; X64-AVX-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-AVX-i64-NEXT: vcvtsd2si %xmm0, %rax
+; X64-AVX-i64-NEXT: vmovq %rax, %xmm0
+; X64-AVX-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X64-AVX-i64-NEXT: retq
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x)
+ ret <2 x iXLen> %a
}
-declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>)
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
-define <4 x i64> @lrint_v4f64(<4 x double> %x) {
+define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
; X86-SSE2-LABEL: lrint_v4f64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pushl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT: .cfi_offset %ebp, -8
-; X86-SSE2-NEXT: movl %esp, %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
-; X86-SSE2-NEXT: andl $-8, %esp
-; X86-SSE2-NEXT: subl $32, %esp
-; X86-SSE2-NEXT: movhps %xmm0, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: movhps %xmm1, (%esp)
-; X86-SSE2-NEXT: movlps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: fldl (%esp)
-; X86-SSE2-NEXT: fistpll (%esp)
-; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; X86-SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; X86-SSE2-NEXT: movl %ebp, %esp
-; X86-SSE2-NEXT: popl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
+; X86-SSE2-NEXT: movd %eax, %xmm2
+; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
+; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
+; X86-SSE2-NEXT: movd %eax, %xmm1
+; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
+; X86-SSE2-NEXT: movd %eax, %xmm1
+; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
; X86-SSE2-NEXT: retl
;
-; X64-SSE-LABEL: lrint_v4f64:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: cvtsd2si %xmm0, %rax
-; X64-SSE-NEXT: movq %rax, %xmm2
-; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; X64-SSE-NEXT: cvtsd2si %xmm0, %rax
-; X64-SSE-NEXT: movq %rax, %xmm0
-; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
-; X64-SSE-NEXT: cvtsd2si %xmm1, %rax
-; X64-SSE-NEXT: movq %rax, %xmm3
-; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
-; X64-SSE-NEXT: cvtsd2si %xmm1, %rax
-; X64-SSE-NEXT: movq %rax, %xmm0
-; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
-; X64-SSE-NEXT: movdqa %xmm2, %xmm0
-; X64-SSE-NEXT: movdqa %xmm3, %xmm1
-; X64-SSE-NEXT: retq
- %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x)
- ret <4 x i64> %a
+; X86-AVX-LABEL: lrint_v4f64:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
+; X86-AVX-NEXT: vcvtsd2si %xmm1, %eax
+; X86-AVX-NEXT: vcvtsd2si %xmm0, %ecx
+; X86-AVX-NEXT: vmovd %ecx, %xmm1
+; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; X86-AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; X86-AVX-NEXT: vcvtsd2si %xmm0, %eax
+; X86-AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; X86-AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X86-AVX-NEXT: vcvtsd2si %xmm0, %eax
+; X86-AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
+; X86-AVX-NEXT: vzeroupper
+; X86-AVX-NEXT: retl
+;
+; X64-AVX-i32-LABEL: lrint_v4f64:
+; X64-AVX-i32: # %bb.0:
+; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
+; X64-AVX-i32-NEXT: vcvtsd2si %xmm1, %eax
+; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %ecx
+; X64-AVX-i32-NEXT: vmovd %ecx, %xmm1
+; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; X64-AVX-i32-NEXT: vextractf128 $1, %ymm0, %xmm0
+; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %eax
+...
[truncated]
|
Figured it out! There were conflicts between avx and avx512f: the only problem with UTC is that it wasn't reporting conflicts! Fixed now. |
2978f02
to
a1ed2f7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
Fix a bug introduced in 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering), where ISD::LRINT and ISD::LLRINT used WidenVecRes_Unary to widen the vector result. This leads to incorrect CodeGen for RISC-V fixed-vectors of length 3, and a crash in SelectionDAG when we try to lower llvm.lrint.vxi32.vxf64 on i686. Fix the bug by implementing a correct WidenVecRes_XRINT.
Fixes #71187.