Skip to content

Commit 4a32c48

Browse files
committed
[X86] LowerTRUNCATE - ensure we handle cases where we truncate to a sub-128bit type (PR66194)
Fixes #66194
1 parent a685715 commit 4a32c48

File tree

2 files changed

+195
-1
lines changed

2 files changed

+195
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20369,7 +20369,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
2036920369

2037020370
// If we're called by the type legalizer, handle a few cases.
2037120371
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20372-
if (!TLI.isTypeLegal(InVT)) {
20372+
if (!TLI.isTypeLegal(VT) || !TLI.isTypeLegal(InVT)) {
2037320373
if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) &&
2037420374
VT.is128BitVector() && Subtarget.hasAVX512()) {
2037520375
assert((InVT == MVT::v16i64 || Subtarget.hasVLX()) &&

llvm/test/CodeGen/X86/vector-trunc.ll

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1948,6 +1948,200 @@ define void @PR34773(ptr %a0, ptr %a1) {
19481948
ret void
19491949
}
19501950

1951+
define i16 @PR66194(i8 %q) {
1952+
; SSE2-LABEL: PR66194:
1953+
; SSE2: # %bb.0: # %entry
1954+
; SSE2-NEXT: xorl %eax, %eax
1955+
; SSE2-NEXT: xorl %ecx, %ecx
1956+
; SSE2-NEXT: testb %dil, %dil
1957+
; SSE2-NEXT: setne %al
1958+
; SSE2-NEXT: sete %cl
1959+
; SSE2-NEXT: movl %ecx, %edx
1960+
; SSE2-NEXT: shll $16, %edx
1961+
; SSE2-NEXT: orl %eax, %edx
1962+
; SSE2-NEXT: movd %edx, %xmm0
1963+
; SSE2-NEXT: pinsrw $2, %eax, %xmm0
1964+
; SSE2-NEXT: pinsrw $3, %eax, %xmm0
1965+
; SSE2-NEXT: pinsrw $4, %ecx, %xmm0
1966+
; SSE2-NEXT: pinsrw $5, %eax, %xmm0
1967+
; SSE2-NEXT: pinsrw $6, %eax, %xmm0
1968+
; SSE2-NEXT: pinsrw $7, %ecx, %xmm0
1969+
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
1970+
; SSE2-NEXT: psubw %xmm1, %xmm0
1971+
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1972+
; SSE2-NEXT: packuswb %xmm0, %xmm0
1973+
; SSE2-NEXT: pxor %xmm1, %xmm1
1974+
; SSE2-NEXT: psadbw %xmm0, %xmm1
1975+
; SSE2-NEXT: movd %xmm1, %eax
1976+
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
1977+
; SSE2-NEXT: retq
1978+
;
1979+
; SSSE3-LABEL: PR66194:
1980+
; SSSE3: # %bb.0: # %entry
1981+
; SSSE3-NEXT: xorl %eax, %eax
1982+
; SSSE3-NEXT: xorl %ecx, %ecx
1983+
; SSSE3-NEXT: testb %dil, %dil
1984+
; SSSE3-NEXT: setne %al
1985+
; SSSE3-NEXT: sete %cl
1986+
; SSSE3-NEXT: movl %ecx, %edx
1987+
; SSSE3-NEXT: shll $16, %edx
1988+
; SSSE3-NEXT: orl %eax, %edx
1989+
; SSSE3-NEXT: movd %edx, %xmm0
1990+
; SSSE3-NEXT: pinsrw $2, %eax, %xmm0
1991+
; SSSE3-NEXT: pinsrw $3, %eax, %xmm0
1992+
; SSSE3-NEXT: pinsrw $4, %ecx, %xmm0
1993+
; SSSE3-NEXT: pinsrw $5, %eax, %xmm0
1994+
; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
1995+
; SSSE3-NEXT: pinsrw $7, %ecx, %xmm0
1996+
; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
1997+
; SSSE3-NEXT: psubw %xmm1, %xmm0
1998+
; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1999+
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
2000+
; SSSE3-NEXT: shll $8, %eax
2001+
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
2002+
; SSSE3-NEXT: orl %eax, %ecx
2003+
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
2004+
; SSSE3-NEXT: shll $8, %eax
2005+
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
2006+
; SSSE3-NEXT: orl %eax, %edx
2007+
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
2008+
; SSSE3-NEXT: shll $16, %eax
2009+
; SSSE3-NEXT: orl %edx, %eax
2010+
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
2011+
; SSSE3-NEXT: shll $24, %edx
2012+
; SSSE3-NEXT: orl %eax, %edx
2013+
; SSSE3-NEXT: movd %edx, %xmm0
2014+
; SSSE3-NEXT: pinsrw $2, %ecx, %xmm0
2015+
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
2016+
; SSSE3-NEXT: shll $8, %eax
2017+
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
2018+
; SSSE3-NEXT: orl %eax, %ecx
2019+
; SSSE3-NEXT: pinsrw $3, %ecx, %xmm0
2020+
; SSSE3-NEXT: pxor %xmm1, %xmm1
2021+
; SSSE3-NEXT: psadbw %xmm0, %xmm1
2022+
; SSSE3-NEXT: movd %xmm1, %eax
2023+
; SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
2024+
; SSSE3-NEXT: retq
2025+
;
2026+
; SSE41-LABEL: PR66194:
2027+
; SSE41: # %bb.0: # %entry
2028+
; SSE41-NEXT: xorl %eax, %eax
2029+
; SSE41-NEXT: xorl %ecx, %ecx
2030+
; SSE41-NEXT: testb %dil, %dil
2031+
; SSE41-NEXT: setne %al
2032+
; SSE41-NEXT: sete %cl
2033+
; SSE41-NEXT: movd %eax, %xmm0
2034+
; SSE41-NEXT: pinsrb $2, %ecx, %xmm0
2035+
; SSE41-NEXT: pinsrb $4, %eax, %xmm0
2036+
; SSE41-NEXT: pinsrb $6, %eax, %xmm0
2037+
; SSE41-NEXT: pinsrb $8, %ecx, %xmm0
2038+
; SSE41-NEXT: pinsrb $10, %eax, %xmm0
2039+
; SSE41-NEXT: pinsrb $12, %eax, %xmm0
2040+
; SSE41-NEXT: pinsrb $14, %ecx, %xmm0
2041+
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
2042+
; SSE41-NEXT: psubw %xmm1, %xmm0
2043+
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2044+
; SSE41-NEXT: pxor %xmm1, %xmm1
2045+
; SSE41-NEXT: psadbw %xmm0, %xmm1
2046+
; SSE41-NEXT: movd %xmm1, %eax
2047+
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
2048+
; SSE41-NEXT: retq
2049+
;
2050+
; AVX1-LABEL: PR66194:
2051+
; AVX1: # %bb.0: # %entry
2052+
; AVX1-NEXT: xorl %eax, %eax
2053+
; AVX1-NEXT: testb %dil, %dil
2054+
; AVX1-NEXT: setne %al
2055+
; AVX1-NEXT: sete %cl
2056+
; AVX1-NEXT: vmovd %eax, %xmm0
2057+
; AVX1-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2058+
; AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
2059+
; AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
2060+
; AVX1-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
2061+
; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
2062+
; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
2063+
; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
2064+
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
2065+
; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0
2066+
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2067+
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
2068+
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
2069+
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
2070+
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
2071+
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
2072+
; AVX1-NEXT: vmovd %xmm0, %eax
2073+
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
2074+
; AVX1-NEXT: retq
2075+
;
2076+
; AVX2-LABEL: PR66194:
2077+
; AVX2: # %bb.0: # %entry
2078+
; AVX2-NEXT: xorl %eax, %eax
2079+
; AVX2-NEXT: xorl %ecx, %ecx
2080+
; AVX2-NEXT: testb %dil, %dil
2081+
; AVX2-NEXT: setne %al
2082+
; AVX2-NEXT: sete %cl
2083+
; AVX2-NEXT: vmovd %eax, %xmm0
2084+
; AVX2-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
2085+
; AVX2-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
2086+
; AVX2-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
2087+
; AVX2-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
2088+
; AVX2-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
2089+
; AVX2-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
2090+
; AVX2-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
2091+
; AVX2-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2092+
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2093+
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
2094+
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
2095+
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
2096+
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
2097+
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
2098+
; AVX2-NEXT: vmovd %xmm0, %eax
2099+
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
2100+
; AVX2-NEXT: retq
2101+
;
2102+
; AVX512-LABEL: PR66194:
2103+
; AVX512: # %bb.0: # %entry
2104+
; AVX512-NEXT: xorl %eax, %eax
2105+
; AVX512-NEXT: xorl %ecx, %ecx
2106+
; AVX512-NEXT: testb %dil, %dil
2107+
; AVX512-NEXT: setne %al
2108+
; AVX512-NEXT: sete %cl
2109+
; AVX512-NEXT: vmovd %eax, %xmm0
2110+
; AVX512-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
2111+
; AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
2112+
; AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
2113+
; AVX512-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
2114+
; AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
2115+
; AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
2116+
; AVX512-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
2117+
; AVX512-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2118+
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2119+
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
2120+
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
2121+
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
2122+
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
2123+
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
2124+
; AVX512-NEXT: vmovd %xmm0, %eax
2125+
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
2126+
; AVX512-NEXT: retq
2127+
entry:
2128+
%cmp12.i.13 = icmp ne i8 %q, 0
2129+
%cond.i15.13 = zext i1 %cmp12.i.13 to i16
2130+
%tobool.not.i.13 = icmp eq i8 %q, 0
2131+
%cond18.i.13 = zext i1 %tobool.not.i.13 to i16
2132+
%0 = insertelement <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, i16 %cond.i15.13, i64 8
2133+
%1 = insertelement <16 x i16> %0, i16 %cond18.i.13, i64 9
2134+
%2 = insertelement <16 x i16> %1, i16 %cond.i15.13, i64 10
2135+
%3 = insertelement <16 x i16> %2, i16 %cond.i15.13, i64 11
2136+
%4 = insertelement <16 x i16> %3, i16 %cond18.i.13, i64 12
2137+
%5 = insertelement <16 x i16> %4, i16 %cond.i15.13, i64 13
2138+
%6 = insertelement <16 x i16> %5, i16 %cond.i15.13, i64 14
2139+
%7 = insertelement <16 x i16> %6, i16 %cond18.i.13, i64 15
2140+
%8 = tail call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %7)
2141+
ret i16 %8
2142+
}
2143+
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
2144+
19512145
; Store merging must not infinitely fight store splitting.
19522146

19532147
define void @store_merge_split(<8 x i32> %w1, <8 x i32> %w2, i64 %idx, ptr %p) align 2 {

0 commit comments

Comments
 (0)