Skip to content

Commit 9acaaeb

Browse files
committed
[VectorCombine][X86] Add insert(binop(x,y),binop(a,b),idx) test coverage for #124909
1 parent 38cb693 commit 9acaaeb

File tree

1 file changed

+140
-0
lines changed

1 file changed

+140
-0
lines changed
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
4+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX
5+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX
6+
7+
define <2 x double> @insert1_v2f64_f64_fdiv(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) {
8+
; CHECK-LABEL: @insert1_v2f64_f64_fdiv(
9+
; CHECK-NEXT: [[S:%.*]] = fdiv double [[S0:%.*]], [[S1:%.*]]
10+
; CHECK-NEXT: [[R:%.*]] = fdiv <2 x double> [[TMP1:%.*]], [[TMP2:%.*]]
11+
; CHECK-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R]], double [[S]], i32 1
12+
; CHECK-NEXT: ret <2 x double> [[R1]]
13+
;
14+
%s = fdiv double %s0, %s1
15+
%v = fdiv <2 x double> %v0, %v1
16+
%r = insertelement <2 x double> %v, double %s, i32 1
17+
ret <2 x double> %r
18+
}
19+
20+
define <4 x i32> @insert0_v4i32_i32_add(<4 x i32> %v0, <4 x i32> %v1, i32 %s0, i32 %s1) {
21+
; CHECK-LABEL: @insert0_v4i32_i32_add(
22+
; CHECK-NEXT: [[S:%.*]] = add i32 [[S0:%.*]], [[S1:%.*]]
23+
; CHECK-NEXT: [[V:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]]
24+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> [[V]], i32 [[S]], i32 0
25+
; CHECK-NEXT: ret <4 x i32> [[R]]
26+
;
27+
%s = add i32 %s0, %s1
28+
%v = add <4 x i32> %v0, %v1
29+
%r = insertelement <4 x i32> %v, i32 %s, i32 0
30+
ret <4 x i32> %r
31+
}
32+
33+
define <16 x i16> @insert9_v16i16_i16_add(<16 x i16> %v0, <16 x i16> %v1, i16 %s0, i16 %s1) {
34+
; CHECK-LABEL: @insert9_v16i16_i16_add(
35+
; CHECK-NEXT: [[S:%.*]] = add i16 [[S0:%.*]], [[S1:%.*]]
36+
; CHECK-NEXT: [[V:%.*]] = add <16 x i16> [[V0:%.*]], [[V1:%.*]]
37+
; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i16> [[V]], i16 [[S]], i32 9
38+
; CHECK-NEXT: ret <16 x i16> [[R]]
39+
;
40+
%s = add i16 %s0, %s1
41+
%v = add <16 x i16> %v0, %v1
42+
%r = insertelement <16 x i16> %v, i16 %s, i32 9
43+
ret <16 x i16> %r
44+
}
45+
46+
; Merge flags
47+
define <4 x float> @insert0_v4f32_f32_fadd_common_flags(<4 x float> %v0, <4 x float> %v1, float %s0, float %s1) {
48+
; CHECK-LABEL: @insert0_v4f32_f32_fadd_common_flags(
49+
; CHECK-NEXT: [[S:%.*]] = fadd fast float [[S0:%.*]], [[S1:%.*]]
50+
; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[TMP1:%.*]], [[TMP2:%.*]]
51+
; CHECK-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R]], float [[S]], i32 0
52+
; CHECK-NEXT: ret <4 x float> [[R1]]
53+
;
54+
%s = fadd fast float %s0, %s1
55+
%v = fadd fast <4 x float> %v0, %v1
56+
%r = insertelement <4 x float> %v, float %s, i32 0
57+
ret <4 x float> %r
58+
}
59+
60+
; Merge (shared) flags
61+
define <4 x float> @insert1_v4f32_f32_fsub_mixed_flags(<4 x float> %v0, <4 x float> %v1, float %s0, float %s1) {
62+
; CHECK-LABEL: @insert1_v4f32_f32_fsub_mixed_flags(
63+
; CHECK-NEXT: [[S:%.*]] = fsub nnan nsz float [[S0:%.*]], [[S1:%.*]]
64+
; CHECK-NEXT: [[V:%.*]] = fsub nnan ninf <4 x float> [[V0:%.*]], [[V1:%.*]]
65+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[V]], float [[S]], i32 1
66+
; CHECK-NEXT: ret <4 x float> [[R]]
67+
;
68+
%s = fsub nnan nsz float %s0, %s1
69+
%v = fsub nnan ninf <4 x float> %v0, %v1
70+
%r = insertelement <4 x float> %v, float %s, i32 1
71+
ret <4 x float> %r
72+
}
73+
74+
; TODO: Fold equivalent opcodes
75+
define <4 x i32> @insert0_v4i32_i32_or_disjoint_add(<4 x i32> %v0, <4 x i32> %v1, i32 %s0, i32 %s1) {
76+
; CHECK-LABEL: @insert0_v4i32_i32_or_disjoint_add(
77+
; CHECK-NEXT: [[S:%.*]] = add i32 [[S0:%.*]], [[S1:%.*]]
78+
; CHECK-NEXT: [[V:%.*]] = or disjoint <4 x i32> [[V0:%.*]], [[V1:%.*]]
79+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> [[V]], i32 [[S]], i32 0
80+
; CHECK-NEXT: ret <4 x i32> [[R]]
81+
;
82+
%s = add i32 %s0, %s1
83+
%v = or disjoint <4 x i32> %v0, %v1
84+
%r = insertelement <4 x i32> %v, i32 %s, i32 0
85+
ret <4 x i32> %r
86+
}
87+
88+
; Negative - multi use
89+
define <2 x double> @insert0_v2f64_f64_fmul_multiuse(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) {
90+
; CHECK-LABEL: @insert0_v2f64_f64_fmul_multiuse(
91+
; CHECK-NEXT: [[S:%.*]] = fmul double [[S0:%.*]], [[S1:%.*]]
92+
; CHECK-NEXT: [[V:%.*]] = fmul <2 x double> [[V0:%.*]], [[V1:%.*]]
93+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[V]], double [[S]], i32 0
94+
; CHECK-NEXT: call void @use_f64(double [[S]])
95+
; CHECK-NEXT: ret <2 x double> [[R]]
96+
;
97+
%s = fmul double %s0, %s1
98+
%v = fmul <2 x double> %v0, %v1
99+
%r = insertelement <2 x double> %v, double %s, i32 0
100+
call void @use_f64(double %s)
101+
ret <2 x double> %r
102+
}
103+
declare void @use_f64(<2 x double>)
104+
105+
; Negative - multi use
106+
define <2 x i64> @insert0_v2i64_i64_add_multiuse(<2 x i64> %v0, <2 x i64> %v1, i64 %s0, i64 %s1) {
107+
; CHECK-LABEL: @insert0_v2i64_i64_add_multiuse(
108+
; CHECK-NEXT: [[S:%.*]] = add i64 [[S0:%.*]], [[S1:%.*]]
109+
; CHECK-NEXT: [[V:%.*]] = add <2 x i64> [[V0:%.*]], [[V1:%.*]]
110+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> [[V]], i64 [[S]], i32 0
111+
; CHECK-NEXT: call void @use_v2i64(<2 x i64> [[V]])
112+
; CHECK-NEXT: ret <2 x i64> [[R]]
113+
;
114+
%s = add i64 %s0, %s1
115+
%v = add <2 x i64> %v0, %v1
116+
%r = insertelement <2 x i64> %v, i64 %s, i32 0
117+
call void @use_v2i64(<2 x i64> %v)
118+
ret <2 x i64> %r
119+
}
120+
declare void @use_v2i64(<2 x i64>)
121+
122+
; Negative - binop mismatch
123+
define <2 x double> @insert0_v2f64_f64_fadd_fsub(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) {
124+
; CHECK-LABEL: @insert0_v2f64_f64_fadd_fsub(
125+
; CHECK-NEXT: [[S:%.*]] = fsub double [[S0:%.*]], [[S1:%.*]]
126+
; CHECK-NEXT: [[V:%.*]] = fadd <2 x double> [[V0:%.*]], [[V1:%.*]]
127+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[V]], double [[S]], i32 0
128+
; CHECK-NEXT: ret <2 x double> [[R]]
129+
;
130+
%s = fsub double %s0, %s1
131+
%v = fadd <2 x double> %v0, %v1
132+
%r = insertelement <2 x double> %v, double %s, i32 0
133+
ret <2 x double> %r
134+
}
135+
136+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
137+
; AVX: {{.*}}
138+
; SSE: {{.*}}
139+
; SSE2: {{.*}}
140+
; SSE4: {{.*}}

0 commit comments

Comments
 (0)