Skip to content

Commit f50e881

Browse files
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (7/11)
1 parent 9508d5e commit f50e881

File tree

3 files changed

+152
-2
lines changed

3 files changed

+152
-2
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4277,7 +4277,7 @@ let Predicates = [HasSVE2p2orSME2p2] in {
42774277
defm SCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b0, "scvtf", "int_aarch64_sve_scvtf", AArch64scvtf_mt>;
42784278
defm UCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b1, "ucvtf", "int_aarch64_sve_ucvtf", AArch64ucvtf_mt>;
42794279
// Signed integer base 2 logarithm of fp value, zeroing predicate
4280-
defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb">;
4280+
defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb", int_aarch64_sve_flogb>;
42814281

42824282
// SVE2 integer unary operations, zeroing predicate
42834283
def URECPE_ZPzZ : sve2_int_un_pred_arit_z<0b10, 0b00, "urecpe", ZPR32>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3331,10 +3331,14 @@ multiclass sve_fp_z2op_p_zd_c<bit U, string asm, string int_op, SDPatternOperato
33313331
def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, ir_op, nxv2i1,nxv2i64, !cast<Instruction>(NAME # _DtoD)>;
33323332
}
33333333

3334-
multiclass sve_fp_z2op_p_zd_d_flogb<string asm> {
3334+
multiclass sve_fp_z2op_p_zd_d_flogb<string asm, SDPatternOperator op> {
33353335
def _H : sve_fp_z2op_p_zd<0b0011001, asm, ZPR16, ZPR16>;
33363336
def _S : sve_fp_z2op_p_zd<0b0011010, asm, ZPR32, ZPR32>;
33373337
def _D : sve_fp_z2op_p_zd<0b0011011, asm, ZPR64, ZPR64>;
3338+
3339+
def : SVE_3_Op_UndefZero_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
3340+
def : SVE_3_Op_UndefZero_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
3341+
def : SVE_3_Op_UndefZero_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
33383342
}
33393343

33403344
multiclass sve_fp_z2op_p_zd_b_0<string asm, string op> {
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mattr=+sve2 < %s | FileCheck %s
3+
; RUN: llc -mattr=+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2
4+
5+
; RUN: llc -mattr=+sme2 -force-streaming < %s | FileCheck %s
6+
; RUN: llc -mattr=+sme2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2
7+
8+
target triple = "aarch64-linux"
9+
10+
define <vscale x 8 x i16> @test_svlogb_f16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x half> %x) {
11+
; CHECK-LABEL: test_svlogb_f16_x_1:
12+
; CHECK: // %bb.0: // %entry
13+
; CHECK-NEXT: flogb z0.h, p0/m, z0.h
14+
; CHECK-NEXT: ret
15+
;
16+
; CHECK-2p2-LABEL: test_svlogb_f16_x_1:
17+
; CHECK-2p2: // %bb.0: // %entry
18+
; CHECK-2p2-NEXT: flogb z0.h, p0/z, z0.h
19+
; CHECK-2p2-NEXT: ret
20+
entry:
21+
%0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
22+
ret <vscale x 8 x i16> %0
23+
}
24+
25+
define <vscale x 8 x i16> @test_svlogb_f16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
26+
; CHECK-LABEL: test_svlogb_f16_x_2:
27+
; CHECK: // %bb.0: // %entry
28+
; CHECK-NEXT: flogb z0.h, p0/m, z1.h
29+
; CHECK-NEXT: ret
30+
;
31+
; CHECK-2p2-LABEL: test_svlogb_f16_x_2:
32+
; CHECK-2p2: // %bb.0: // %entry
33+
; CHECK-2p2-NEXT: flogb z0.h, p0/z, z1.h
34+
; CHECK-2p2-NEXT: ret
35+
entry:
36+
%0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
37+
ret <vscale x 8 x i16> %0
38+
}
39+
40+
define <vscale x 8 x i16> @test_svlogb_f16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
41+
; CHECK-LABEL: test_svlogb_f16_z:
42+
; CHECK: // %bb.0: // %entry
43+
; CHECK-NEXT: mov z0.h, #0 // =0x0
44+
; CHECK-NEXT: flogb z0.h, p0/m, z1.h
45+
; CHECK-NEXT: ret
46+
;
47+
; CHECK-2p2-LABEL: test_svlogb_f16_z:
48+
; CHECK-2p2: // %bb.0: // %entry
49+
; CHECK-2p2-NEXT: flogb z0.h, p0/z, z1.h
50+
; CHECK-2p2-NEXT: ret
51+
entry:
52+
%0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
53+
ret <vscale x 8 x i16> %0
54+
}
55+
56+
define <vscale x 4 x i32> @test_svlogb_f32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
57+
; CHECK-LABEL: test_svlogb_f32_x_1:
58+
; CHECK: // %bb.0: // %entry
59+
; CHECK-NEXT: flogb z0.s, p0/m, z0.s
60+
; CHECK-NEXT: ret
61+
;
62+
; CHECK-2p2-LABEL: test_svlogb_f32_x_1:
63+
; CHECK-2p2: // %bb.0: // %entry
64+
; CHECK-2p2-NEXT: flogb z0.s, p0/z, z0.s
65+
; CHECK-2p2-NEXT: ret
66+
entry:
67+
%0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
68+
ret <vscale x 4 x i32> %0
69+
}
70+
71+
define <vscale x 4 x i32> @test_svlogb_f32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
72+
; CHECK-LABEL: test_svlogb_f32_x_2:
73+
; CHECK: // %bb.0: // %entry
74+
; CHECK-NEXT: flogb z0.s, p0/m, z1.s
75+
; CHECK-NEXT: ret
76+
;
77+
; CHECK-2p2-LABEL: test_svlogb_f32_x_2:
78+
; CHECK-2p2: // %bb.0: // %entry
79+
; CHECK-2p2-NEXT: flogb z0.s, p0/z, z1.s
80+
; CHECK-2p2-NEXT: ret
81+
entry:
82+
%0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
83+
ret <vscale x 4 x i32> %0
84+
}
85+
86+
define <vscale x 4 x i32> @test_svlogb_f32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
87+
; CHECK-LABEL: test_svlogb_f32_z:
88+
; CHECK: // %bb.0: // %entry
89+
; CHECK-NEXT: mov z0.s, #0 // =0x0
90+
; CHECK-NEXT: flogb z0.s, p0/m, z1.s
91+
; CHECK-NEXT: ret
92+
;
93+
; CHECK-2p2-LABEL: test_svlogb_f32_z:
94+
; CHECK-2p2: // %bb.0: // %entry
95+
; CHECK-2p2-NEXT: flogb z0.s, p0/z, z1.s
96+
; CHECK-2p2-NEXT: ret
97+
entry:
98+
%0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
99+
ret <vscale x 4 x i32> %0
100+
}
101+
102+
define <vscale x 2 x i64> @test_svlogb_f64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
103+
; CHECK-LABEL: test_svlogb_f64_x_1:
104+
; CHECK: // %bb.0: // %entry
105+
; CHECK-NEXT: flogb z0.d, p0/m, z0.d
106+
; CHECK-NEXT: ret
107+
;
108+
; CHECK-2p2-LABEL: test_svlogb_f64_x_1:
109+
; CHECK-2p2: // %bb.0: // %entry
110+
; CHECK-2p2-NEXT: flogb z0.d, p0/z, z0.d
111+
; CHECK-2p2-NEXT: ret
112+
entry:
113+
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
114+
ret <vscale x 2 x i64> %0
115+
}
116+
117+
define <vscale x 2 x i64> @test_svlogb_f64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
118+
; CHECK-LABEL: test_svlogb_f64_x_2:
119+
; CHECK: // %bb.0: // %entry
120+
; CHECK-NEXT: flogb z0.d, p0/m, z1.d
121+
; CHECK-NEXT: ret
122+
;
123+
; CHECK-2p2-LABEL: test_svlogb_f64_x_2:
124+
; CHECK-2p2: // %bb.0: // %entry
125+
; CHECK-2p2-NEXT: flogb z0.d, p0/z, z1.d
126+
; CHECK-2p2-NEXT: ret
127+
entry:
128+
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
129+
ret <vscale x 2 x i64> %0
130+
}
131+
132+
define <vscale x 2 x i64> @test_svlogb_f64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
133+
; CHECK-LABEL: test_svlogb_f64_z:
134+
; CHECK: // %bb.0: // %entry
135+
; CHECK-NEXT: mov z0.d, #0 // =0x0
136+
; CHECK-NEXT: flogb z0.d, p0/m, z1.d
137+
; CHECK-NEXT: ret
138+
;
139+
; CHECK-2p2-LABEL: test_svlogb_f64_z:
140+
; CHECK-2p2: // %bb.0: // %entry
141+
; CHECK-2p2-NEXT: flogb z0.d, p0/z, z1.d
142+
; CHECK-2p2-NEXT: ret
143+
entry:
144+
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
145+
ret <vscale x 2 x i64> %0
146+
}

0 commit comments

Comments
 (0)