Skip to content

Commit 078b60d

Browse files
committed
[X86] Only fold to v16i32 VPDPWSSD on targets with useAVX512Regs enabled.
Fixes #119158
1 parent 1004496 commit 078b60d

File tree

2 files changed

+41
-1
lines changed

2 files changed

+41
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56911,7 +56911,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
5691156911

5691256912
// Peephole for 512-bit VPDPBSSD on non-VLX targets.
5691356913
// TODO: Should this be part of matchPMADDWD/matchPMADDWD_2?
56914-
if (Subtarget.hasVNNI() && VT == MVT::v16i32) {
56914+
if (Subtarget.hasVNNI() && Subtarget.useAVX512Regs() && VT == MVT::v16i32) {
5691556915
using namespace SDPatternMatch;
5691656916
SDValue Accum, Lo0, Lo1, Hi0, Hi1;
5691756917
if (sd_match(N, m_Add(m_Value(Accum),

llvm/test/CodeGen/X86/pr119158.ll

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
3+
4+
define dso_local void @foo() #1 {
5+
; CHECK-LABEL: foo:
6+
; CHECK: # %bb.0: # %newFuncRoot
7+
; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
8+
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm1 = [64,64,64,64,64,64,64,64]
9+
; CHECK-NEXT: vpdpwssd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm1
10+
; CHECK-NEXT: vpsrld $7, %ymm1, %ymm0
11+
; CHECK-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
12+
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
13+
; CHECK-NEXT: vmovdqu %ymm0, (%rax)
14+
; CHECK-NEXT: vzeroupper
15+
; CHECK-NEXT: retq
16+
newFuncRoot:
17+
br label %loop
18+
19+
loop: ; preds = %newFuncRoot, %loop
20+
%0 = load <16 x i8>, ptr poison, align 1
21+
%1 = zext <16 x i8> %0 to <16 x i32>
22+
%2 = mul nuw nsw <16 x i32> %1, splat (i32 18)
23+
%3 = add nuw nsw <16 x i32> zeroinitializer, splat (i32 64)
24+
%4 = add nuw nsw <16 x i32> %3, zeroinitializer
25+
%5 = add nuw nsw <16 x i32> %4, %2
26+
%6 = sub nsw <16 x i32> %5, zeroinitializer
27+
%7 = ashr <16 x i32> %6, splat (i32 7)
28+
%8 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %7, <16 x i32> splat (i32 255))
29+
%9 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %8, <16 x i32> zeroinitializer)
30+
%10 = trunc <16 x i32> %9 to <16 x i8>
31+
%11 = shufflevector <16 x i8> %10, <16 x i8> poison, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
32+
store <32 x i8> %11, ptr poison, align 1
33+
br i1 poison, label %.exitStub, label %loop
34+
35+
.exitStub: ; preds = %loop
36+
ret void
37+
}
38+
39+
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
40+
attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="tigerlake" "target-features"="+adx,+aes,+avx,+avx2,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vp2intersect,+avx512vpopcntdq,+bmi,+bmi2,+clflushopt,+clwb,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fsgsbase,+fxsr,+gfni,+invpcid,+kl,+lzcnt,+mmx,+movbe,+movdir64b,+movdiri,+pclmul,+pku,+popcnt,+prfchw,+rdpid,+rdrnd,+rdseed,+sahf,+sgx,+sha,+shstk,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+widekl,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" }

0 commit comments

Comments
 (0)