[VectorCombine][X86] Add insert(binop(x,y),binop(a,b),idx) test coverage for #124909

RKSimon · RKSimon · commit 9acaaebcdd39 · 2025-01-30T15:12:17.000Z
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-vector.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-vector.ll
@@ -0,0 +1,140 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64    | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX
+
+define <2 x double> @insert1_v2f64_f64_fdiv(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) {
+; CHECK-LABEL: @insert1_v2f64_f64_fdiv(
+; CHECK-NEXT:    [[S:%.*]] = fdiv double [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fdiv <2 x double> [[TMP1:%.*]], [[TMP2:%.*]]
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <2 x double> [[R]], double [[S]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[R1]]
+;
+  %s = fdiv double %s0, %s1
+  %v = fdiv <2 x double> %v0, %v1
+  %r = insertelement <2 x double> %v, double %s, i32 1
+  ret <2 x double> %r
+}
+
+define <4 x i32> @insert0_v4i32_i32_add(<4 x i32> %v0, <4 x i32> %v1, i32 %s0, i32 %s1) {
+; CHECK-LABEL: @insert0_v4i32_i32_add(
+; CHECK-NEXT:    [[S:%.*]] = add i32 [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT:    [[V:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x i32> [[V]], i32 [[S]], i32 0
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %s = add i32 %s0, %s1
+  %v = add <4 x i32> %v0, %v1
+  %r = insertelement <4 x i32> %v, i32 %s, i32 0
+  ret <4 x i32> %r
+}
+
+define <16 x i16> @insert9_v16i16_i16_add(<16 x i16> %v0, <16 x i16> %v1, i16 %s0, i16 %s1) {
+; CHECK-LABEL: @insert9_v16i16_i16_add(
+; CHECK-NEXT:    [[S:%.*]] = add i16 [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT:    [[V:%.*]] = add <16 x i16> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = insertelement <16 x i16> [[V]], i16 [[S]], i32 9
+; CHECK-NEXT:    ret <16 x i16> [[R]]
+;
+  %s = add i16 %s0, %s1
+  %v = add <16 x i16> %v0, %v1
+  %r = insertelement <16 x i16> %v, i16 %s, i32 9
+  ret <16 x i16> %r
+}
+
+; Merge flags
+define <4 x float> @insert0_v4f32_f32_fadd_common_flags(<4 x float> %v0, <4 x float> %v1, float %s0, float %s1) {
+; CHECK-LABEL: @insert0_v4f32_f32_fadd_common_flags(
+; CHECK-NEXT:    [[S:%.*]] = fadd fast float [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fadd fast <4 x float> [[TMP1:%.*]], [[TMP2:%.*]]
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <4 x float> [[R]], float [[S]], i32 0
+; CHECK-NEXT:    ret <4 x float> [[R1]]
+;
+  %s = fadd fast float %s0, %s1
+  %v = fadd fast <4 x float> %v0, %v1
+  %r = insertelement <4 x float> %v, float %s, i32 0
+  ret <4 x float> %r
+}
+
+; Merge (shared) flags
+define <4 x float> @insert1_v4f32_f32_fsub_mixed_flags(<4 x float> %v0, <4 x float> %v1, float %s0, float %s1) {
+; CHECK-LABEL: @insert1_v4f32_f32_fsub_mixed_flags(
+; CHECK-NEXT:    [[S:%.*]] = fsub nnan nsz float [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT:    [[V:%.*]] = fsub nnan ninf <4 x float> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[V]], float [[S]], i32 1
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %s = fsub nnan nsz float %s0, %s1
+  %v = fsub nnan ninf <4 x float> %v0, %v1
+  %r = insertelement <4 x float> %v, float %s, i32 1
+  ret <4 x float> %r
+}
+
+; TODO: Fold equivalent opcodes
+define <4 x i32> @insert0_v4i32_i32_or_disjoint_add(<4 x i32> %v0, <4 x i32> %v1, i32 %s0, i32 %s1) {
+; CHECK-LABEL: @insert0_v4i32_i32_or_disjoint_add(
+; CHECK-NEXT:    [[S:%.*]] = add i32 [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT:    [[V:%.*]] = or disjoint <4 x i32> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x i32> [[V]], i32 [[S]], i32 0
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %s = add i32 %s0, %s1
+  %v = or disjoint <4 x i32> %v0, %v1
+  %r = insertelement <4 x i32> %v, i32 %s, i32 0
+  ret <4 x i32> %r
+}
+
+; Negative - multi use
+define <2 x double> @insert0_v2f64_f64_fmul_multiuse(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) {
+; CHECK-LABEL: @insert0_v2f64_f64_fmul_multiuse(
+; CHECK-NEXT:    [[S:%.*]] = fmul double [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT:    [[V:%.*]] = fmul <2 x double> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x double> [[V]], double [[S]], i32 0
+; CHECK-NEXT:    call void @use_f64(double [[S]])
+; CHECK-NEXT:    ret <2 x double> [[R]]
+;
+  %s = fmul double %s0, %s1
+  %v = fmul <2 x double> %v0, %v1
+  %r = insertelement <2 x double> %v, double %s, i32 0
+  call void @use_f64(double %s)
+  ret <2 x double> %r
+}
+declare void @use_f64(<2 x double>)
+
+; Negative - multi use
+define <2 x i64> @insert0_v2i64_i64_add_multiuse(<2 x i64> %v0, <2 x i64> %v1, i64 %s0, i64 %s1) {
+; CHECK-LABEL: @insert0_v2i64_i64_add_multiuse(
+; CHECK-NEXT:    [[S:%.*]] = add i64 [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT:    [[V:%.*]] = add <2 x i64> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i64> [[V]], i64 [[S]], i32 0
+; CHECK-NEXT:    call void @use_v2i64(<2 x i64> [[V]])
+; CHECK-NEXT:    ret <2 x i64> [[R]]
+;
+  %s = add i64 %s0, %s1
+  %v = add <2 x i64> %v0, %v1
+  %r = insertelement <2 x i64> %v, i64 %s, i32 0
+  call void @use_v2i64(<2 x i64> %v)
+  ret <2 x i64> %r
+}
+declare void @use_v2i64(<2 x i64>)
+
+; Negative - binop mismatch
+define <2 x double> @insert0_v2f64_f64_fadd_fsub(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) {
+; CHECK-LABEL: @insert0_v2f64_f64_fadd_fsub(
+; CHECK-NEXT:    [[S:%.*]] = fsub double [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT:    [[V:%.*]] = fadd <2 x double> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x double> [[V]], double [[S]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[R]]
+;
+  %s = fsub double %s0, %s1
+  %v = fadd <2 x double> %v0, %v1
+  %r = insertelement <2 x double> %v, double %s, i32 0
+  ret <2 x double> %r
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
+; SSE: {{.*}}
+; SSE2: {{.*}}
+; SSE4: {{.*}}