Skip to content

Commit 7963d9a

Browse files
authored
[X86][EVEX512] Add HasEVEX512 when NoVLX used for 512-bit patterns (#91106)
With KNL/KNC being deprecated, we don't need to care about such no VLX cases anymore. We may remove such patterns in the future. Fixes #90844
1 parent d145f40 commit 7963d9a

File tree

3 files changed

+43
-22
lines changed

3 files changed

+43
-22
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30058,7 +30058,9 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
3005830058
return R;
3005930059

3006030060
// AVX512 implicitly uses modulo rotation amounts.
30061-
if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) {
30061+
if ((Subtarget.hasVLX() ||
30062+
(Subtarget.hasAVX512() && Subtarget.hasEVEX512())) &&
30063+
32 <= EltSizeInBits) {
3006230064
// Attempt to rotate by immediate.
3006330065
if (IsCstSplat) {
3006430066
unsigned RotOpc = IsROTL ? X86ISD::VROTLI : X86ISD::VROTRI;

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -826,7 +826,7 @@ defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32bf16_info, v16bf16x_info,
826826

827827
// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
828828
// smaller extract to enable EVEX->VEX.
829-
let Predicates = [NoVLX] in {
829+
let Predicates = [NoVLX, HasEVEX512] in {
830830
def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
831831
(v2i64 (VEXTRACTI128rr
832832
(v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
@@ -3080,7 +3080,7 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
30803080
addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
30813081
}
30823082

3083-
let Predicates = [HasAVX512, NoVLX] in {
3083+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
30843084
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
30853085
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
30863086

@@ -3111,7 +3111,7 @@ let Predicates = [HasAVX512, NoVLX] in {
31113111
defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
31123112
}
31133113

3114-
let Predicates = [HasBWI, NoVLX] in {
3114+
let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
31153115
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
31163116
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
31173117

@@ -3505,7 +3505,7 @@ multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
35053505

35063506
// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
35073507
// available. Use a 512-bit operation and extract.
3508-
let Predicates = [HasAVX512, NoVLX] in {
3508+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
35093509
defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
35103510
defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
35113511
defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
@@ -3517,7 +3517,7 @@ let Predicates = [HasAVX512, NoVLX] in {
35173517
defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
35183518
}
35193519

3520-
let Predicates = [HasBWI, NoVLX] in {
3520+
let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
35213521
defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
35223522
defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
35233523

@@ -5010,8 +5010,8 @@ defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
50105010
defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
50115011
SchedWriteVecALU, HasAVX512, 1>, T8;
50125012

5013-
// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5014-
let Predicates = [HasDQI, NoVLX] in {
5013+
// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512.
5014+
let Predicates = [HasDQI, NoVLX, HasEVEX512] in {
50155015
def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
50165016
(EXTRACT_SUBREG
50175017
(VPMULLQZrr
@@ -5067,7 +5067,7 @@ multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
50675067
sub_xmm)>;
50685068
}
50695069

5070-
let Predicates = [HasAVX512, NoVLX] in {
5070+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
50715071
defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
50725072
defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
50735073
defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
@@ -6044,7 +6044,7 @@ defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
60446044
SchedWriteVecShift>;
60456045

60466046
// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6047-
let Predicates = [HasAVX512, NoVLX] in {
6047+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
60486048
def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
60496049
(EXTRACT_SUBREG (v8i64
60506050
(VPSRAQZrr
@@ -6173,14 +6173,14 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecS
61736173
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
61746174
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
61756175

6176-
defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6177-
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6178-
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6179-
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6176+
defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX, HasEVEX512]>;
6177+
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX, HasEVEX512]>;
6178+
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX, HasEVEX512]>;
6179+
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX, HasEVEX512]>;
61806180

61816181

61826182
// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6183-
let Predicates = [HasAVX512, NoVLX] in {
6183+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
61846184
def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
61856185
(EXTRACT_SUBREG (v8i64
61866186
(VPROLVQZrr
@@ -6231,7 +6231,7 @@ let Predicates = [HasAVX512, NoVLX] in {
62316231
}
62326232

62336233
// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6234-
let Predicates = [HasAVX512, NoVLX] in {
6234+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
62356235
def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
62366236
(EXTRACT_SUBREG (v8i64
62376237
(VPRORVQZrr
@@ -9863,7 +9863,7 @@ defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
98639863
truncstore_us_vi8, masked_truncstore_us_vi8,
98649864
X86vtruncus, X86vmtruncus>;
98659865

9866-
let Predicates = [HasAVX512, NoVLX] in {
9866+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
98679867
def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
98689868
(v8i16 (EXTRACT_SUBREG
98699869
(v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
@@ -9874,7 +9874,7 @@ def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
98749874
VR256X:$src, sub_ymm)))), sub_xmm))>;
98759875
}
98769876

9877-
let Predicates = [HasBWI, NoVLX] in {
9877+
let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
98789878
def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
98799879
(v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
98809880
VR256X:$src, sub_ymm))), sub_xmm))>;
@@ -10417,7 +10417,7 @@ multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
1041710417
defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
1041810418
EVEX_V128;
1041910419
}
10420-
let Predicates = [prd, NoVLX] in {
10420+
let Predicates = [prd, NoVLX, HasEVEX512] in {
1042110421
defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
1042210422
defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
1042310423
}
@@ -11204,7 +11204,7 @@ defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
1120411204
SchedWriteVecALU>;
1120511205

1120611206
// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11207-
let Predicates = [HasAVX512, NoVLX] in {
11207+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
1120811208
def : Pat<(v4i64 (abs VR256X:$src)),
1120911209
(EXTRACT_SUBREG
1121011210
(VPABSQZrr
@@ -11220,7 +11220,7 @@ let Predicates = [HasAVX512, NoVLX] in {
1122011220
// Use 512bit version to implement 128/256 bit.
1122111221
multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
1122211222
AVX512VLVectorVTInfo _, Predicate prd> {
11223-
let Predicates = [prd, NoVLX] in {
11223+
let Predicates = [prd, NoVLX, HasEVEX512] in {
1122411224
def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
1122511225
(EXTRACT_SUBREG
1122611226
(!cast<Instruction>(InstrStr # "Zrr")
@@ -11839,7 +11839,7 @@ let Predicates = [HasAVX512] in {
1183911839
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
1184011840
}
1184111841

11842-
let Predicates = [HasAVX512, NoVLX] in {
11842+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
1184311843
def : Pat<(v16i8 (vnot VR128X:$src)),
1184411844
(EXTRACT_SUBREG
1184511845
(VPTERNLOGQZrri

llvm/test/CodeGen/X86/pr90844.ll

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-evex512 < %s | FileCheck %s
3+
4+
define void @PR90844() {
5+
; CHECK-LABEL: PR90844:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
8+
; CHECK-NEXT: vmovaps %xmm0, (%rax)
9+
; CHECK-NEXT: retq
10+
entry:
11+
%0 = tail call <2 x i32> @llvm.fshl.v2i32(<2 x i32> poison, <2 x i32> poison, <2 x i32> <i32 8, i32 24>)
12+
%1 = and <2 x i32> %0, <i32 16711935, i32 -134152448>
13+
%2 = or disjoint <2 x i32> zeroinitializer, %1
14+
%3 = zext <2 x i32> %2 to <2 x i64>
15+
%4 = shl nuw <2 x i64> %3, <i64 32, i64 32>
16+
%5 = or disjoint <2 x i64> %4, zeroinitializer
17+
store <2 x i64> %5, ptr poison, align 16
18+
ret void
19+
}

0 commit comments

Comments
 (0)