@@ -1391,24 +1391,24 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr
1391
1391
; AVX2-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[IN]], i64 [[TMP1]]
1392
1392
; AVX2-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[IN]], i64 [[TMP2]]
1393
1393
; AVX2-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[IN]], i64 [[TMP3]]
1394
+ ; AVX2-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i1> [[TMP16]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1395
+ ; AVX2-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP17]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1396
+ ; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP18]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1397
+ ; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP19]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1394
1398
; AVX2-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP20]], i32 0
1395
1399
; AVX2-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP24]], i32 -3
1396
- ; AVX2-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i1> [[TMP16]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1397
1400
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP25]], i32 8, <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope !21
1398
1401
; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1399
1402
; AVX2-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP20]], i32 -4
1400
1403
; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP26]], i32 -3
1401
- ; AVX2-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP17]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1402
1404
; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP27]], i32 8, <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope !21
1403
1405
; AVX2-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD15]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1404
1406
; AVX2-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP20]], i32 -8
1405
1407
; AVX2-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -3
1406
- ; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP18]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1407
1408
; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP29]], i32 8, <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope !21
1408
1409
; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD18]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1409
1410
; AVX2-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP20]], i32 -12
1410
1411
; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -3
1411
- ; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP19]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1412
1412
; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP31]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope !21
1413
1413
; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1414
1414
; AVX2-NEXT: [[TMP32:%.*]] = fadd <4 x double> [[REVERSE13]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
@@ -1515,24 +1515,24 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr
1515
1515
; AVX512-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[IN]], i64 [[TMP1]]
1516
1516
; AVX512-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[IN]], i64 [[TMP2]]
1517
1517
; AVX512-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[IN]], i64 [[TMP3]]
1518
+ ; AVX512-NEXT: [[REVERSE12:%.*]] = shufflevector <8 x i1> [[TMP16]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1519
+ ; AVX512-NEXT: [[REVERSE14:%.*]] = shufflevector <8 x i1> [[TMP17]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1520
+ ; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP18]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1521
+ ; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP19]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1518
1522
; AVX512-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP20]], i32 0
1519
1523
; AVX512-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP24]], i32 -7
1520
- ; AVX512-NEXT: [[REVERSE12:%.*]] = shufflevector <8 x i1> [[TMP16]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1521
1524
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP25]], i32 8, <8 x i1> [[REVERSE12]], <8 x double> poison), !alias.scope !34
1522
1525
; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1523
1526
; AVX512-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP20]], i32 -8
1524
1527
; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP26]], i32 -7
1525
- ; AVX512-NEXT: [[REVERSE14:%.*]] = shufflevector <8 x i1> [[TMP17]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1526
1528
; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP27]], i32 8, <8 x i1> [[REVERSE14]], <8 x double> poison), !alias.scope !34
1527
1529
; AVX512-NEXT: [[REVERSE16:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD15]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1528
1530
; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP20]], i32 -16
1529
1531
; AVX512-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -7
1530
- ; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP18]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1531
1532
; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP29]], i32 8, <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope !34
1532
1533
; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD18]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1533
1534
; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP20]], i32 -24
1534
1535
; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -7
1535
- ; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP19]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1536
1536
; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP31]], i32 8, <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope !34
1537
1537
; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1538
1538
; AVX512-NEXT: [[TMP32:%.*]] = fadd <8 x double> [[REVERSE13]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
0 commit comments