@@ -9234,49 +9234,78 @@ uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
9234
9234
// return vqdmull_s32(a, b);
9235
9235
// }
9236
9236
9237
- // NYI-LABEL: @test_vqdmlal_s16(
9238
- // NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9239
- // NYI: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9240
- // NYI: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9241
- // NYI: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
9242
- // NYI: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
9243
- // NYI: ret <4 x i32> [[VQDMLAL_V3_I]]
9244
- // int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9245
- // return vqdmlal_s16(a, b, c);
9246
- // }
9237
+ int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9238
+ return vqdmlal_s16(a, b, c);
9247
9239
9248
- // NYI-LABEL: @test_vqdmlal_s32(
9249
- // NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9250
- // NYI: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9251
- // NYI: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9252
- // NYI: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
9253
- // NYI: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
9254
- // NYI: ret <2 x i64> [[VQDMLAL_V3_I]]
9255
- // int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9256
- // return vqdmlal_s32(a, b, c);
9257
- // }
9240
+ // CIR-LABEL: vqdmlal_s16
9241
+ // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmull" {{%.*}}, {{%.*}} :
9242
+ // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s32i x 4>
9243
+ // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqadd" {{%.*}}, {{%.*}} :
9244
+ // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
9258
9245
9259
- // NYI-LABEL: @test_vqdmlsl_s16(
9260
- // NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9261
- // NYI: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9262
- // NYI: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9263
- // NYI: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
9264
- // NYI: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
9265
- // NYI: ret <4 x i32> [[VQDMLSL_V3_I]]
9266
- // int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9267
- // return vqdmlsl_s16(a, b, c);
9268
- // }
9246
+ // LLVM: {{.*}}test_vqdmlal_s16(<4 x i32>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]], <4 x i16>{{.*}}[[c:%.*]])
9247
+ // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[a]] to <16 x i8>
9248
+ // LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[b]] to <8 x i8>
9249
+ // LLVM: [[TMP2:%.*]] = bitcast <4 x i16> [[c]] to <8 x i8>
9250
+ // LLVM: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[b]], <4 x i16> [[c]])
9251
+ // LLVM: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[a]], <4 x i32> [[VQDMLAL2_I]])
9252
+ // LLVM: ret <4 x i32> [[VQDMLAL_V3_I]]
9253
+ }
9269
9254
9270
- // NYI-LABEL: @test_vqdmlsl_s32(
9271
- // NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9272
- // NYI: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9273
- // NYI: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9274
- // NYI: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
9275
- // NYI: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
9276
- // NYI: ret <2 x i64> [[VQDMLSL_V3_I]]
9277
- // int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9278
- // return vqdmlsl_s32(a, b, c);
9279
- // }
9255
+ int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9256
+ return vqdmlal_s32(a, b, c);
9257
+
9258
+ // CIR-LABEL: vqdmlal_s32
9259
+ // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmull" {{%.*}}, {{%.*}} :
9260
+ // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s64i x 2>
9261
+ // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqadd" {{%.*}}, {{%.*}} :
9262
+ // CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
9263
+
9264
+ // LLVM: {{.*}}test_vqdmlal_s32(<2 x i64>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]], <2 x i32>{{.*}}[[c:%.*]])
9265
+ // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[a]] to <16 x i8>
9266
+ // LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[b]] to <8 x i8>
9267
+ // LLVM: [[TMP2:%.*]] = bitcast <2 x i32> [[c]] to <8 x i8>
9268
+ // LLVM: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[b]], <2 x i32> [[c]])
9269
+ // LLVM: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[a]], <2 x i64> [[VQDMLAL2_I]])
9270
+ // LLVM: ret <2 x i64> [[VQDMLAL_V3_I]]
9271
+ }
9272
+
9273
+
9274
+ int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9275
+ return vqdmlsl_s16(a, b, c);
9276
+
9277
+ // CIR-LABEL: vqdmlsl_s16
9278
+ // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmull" {{%.*}}, {{%.*}} :
9279
+ // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s32i x 4>
9280
+ // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqsub" {{%.*}}, {{%.*}} :
9281
+ // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
9282
+
9283
+ // LLVM: {{.*}}test_vqdmlsl_s16(<4 x i32>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]], <4 x i16>{{.*}}[[c:%.*]])
9284
+ // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[a]] to <16 x i8>
9285
+ // LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[b]] to <8 x i8>
9286
+ // LLVM: [[TMP2:%.*]] = bitcast <4 x i16> [[c]] to <8 x i8>
9287
+ // LLVM: [[VQDMLSL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[b]], <4 x i16> [[c]])
9288
+ // LLVM: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[a]], <4 x i32> [[VQDMLSL2_I]])
9289
+ // LLVM: ret <4 x i32> [[VQDMLSL_V3_I]]
9290
+ }
9291
+
9292
+ int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9293
+ return vqdmlsl_s32(a, b, c);
9294
+
9295
+ // CIR-LABEL: vqdmlsl_s32
9296
+ // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmull" {{%.*}}, {{%.*}} :
9297
+ // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s64i x 2>
9298
+ // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqsub" {{%.*}}, {{%.*}} :
9299
+ // CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
9300
+
9301
+ // LLVM: {{.*}}test_vqdmlsl_s32(<2 x i64>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]], <2 x i32>{{.*}}[[c:%.*]])
9302
+ // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[a]] to <16 x i8>
9303
+ // LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[b]] to <8 x i8>
9304
+ // LLVM: [[TMP2:%.*]] = bitcast <2 x i32> [[c]] to <8 x i8>
9305
+ // LLVM: [[VQDMLSL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[b]], <2 x i32> [[c]])
9306
+ // LLVM: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[a]], <2 x i64> [[VQDMLSL2_I]])
9307
+ // LLVM: ret <2 x i64> [[VQDMLSL_V3_I]]
9308
+ }
9280
9309
9281
9310
// NYI-LABEL: @test_vqdmull_high_s16(
9282
9311
// NYI: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
0 commit comments