@@ -424,6 +424,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
424
424
{X86::VMOVSDZrm, 1 , 64 , rebuildZeroUpperCst},
425
425
{X86::VMOVDDUPZ128rm, 1 , 64 , rebuildSplatCst}},
426
426
128 , 1 );
427
+ case X86::VMOVAPDZ128rmk:
428
+ case X86::VMOVUPDZ128rmk:
429
+ return FixupConstant ({{X86::VMOVSDZrmk, 1 , 64 , rebuildZeroUpperCst},
430
+ {X86::VMOVDDUPZ128rmk, 1 , 64 , rebuildSplatCst}},
431
+ 128 , 3 );
432
+ case X86::VMOVAPDZ128rmkz:
433
+ case X86::VMOVUPDZ128rmkz:
434
+ return FixupConstant ({{X86::VMOVSDZrmkz, 1 , 64 , rebuildZeroUpperCst},
435
+ {X86::VMOVDDUPZ128rmkz, 1 , 64 , rebuildSplatCst}},
436
+ 128 , 2 );
437
+ case X86::VMOVAPSZ128rmk:
438
+ case X86::VMOVUPSZ128rmk:
439
+ return FixupConstant ({{X86::VMOVSSZrmk, 1 , 32 , rebuildZeroUpperCst},
440
+ {X86::VBROADCASTSSZ128rmk, 1 , 32 , rebuildSplatCst}},
441
+ 128 , 3 );
442
+ case X86::VMOVAPSZ128rmkz:
443
+ case X86::VMOVUPSZ128rmkz:
444
+ return FixupConstant ({{X86::VMOVSSZrmkz, 1 , 32 , rebuildZeroUpperCst},
445
+ {X86::VBROADCASTSSZ128rmkz, 1 , 32 , rebuildSplatCst}},
446
+ 128 , 2 );
427
447
case X86::VMOVAPDZ256rm:
428
448
case X86::VMOVAPSZ256rm:
429
449
case X86::VMOVUPDZ256rm:
@@ -433,6 +453,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
433
453
{X86::VBROADCASTSDZ256rm, 1 , 64 , rebuildSplatCst},
434
454
{X86::VBROADCASTF32X4Z256rm, 1 , 128 , rebuildSplatCst}},
435
455
256 , 1 );
456
+ case X86::VMOVAPDZ256rmk:
457
+ case X86::VMOVUPDZ256rmk:
458
+ return FixupConstant ({{X86::VBROADCASTSDZ256rmk, 1 , 64 , rebuildSplatCst}},
459
+ 256 , 3 );
460
+ case X86::VMOVAPDZ256rmkz:
461
+ case X86::VMOVUPDZ256rmkz:
462
+ return FixupConstant ({{X86::VBROADCASTSDZ256rmkz, 1 , 64 , rebuildSplatCst}},
463
+ 256 , 2 );
464
+ case X86::VMOVAPSZ256rmk:
465
+ case X86::VMOVUPSZ256rmk:
466
+ return FixupConstant (
467
+ {{X86::VBROADCASTSSZ256rmk, 1 , 32 , rebuildSplatCst},
468
+ {X86::VBROADCASTF32X4Z256rmk, 1 , 128 , rebuildSplatCst}},
469
+ 256 , 3 );
470
+ case X86::VMOVAPSZ256rmkz:
471
+ case X86::VMOVUPSZ256rmkz:
472
+ return FixupConstant (
473
+ {{X86::VBROADCASTSSZ256rmkz, 1 , 32 , rebuildSplatCst},
474
+ {X86::VBROADCASTF32X4Z256rmkz, 1 , 128 , rebuildSplatCst}},
475
+ 256 , 2 );
436
476
case X86::VMOVAPDZrm:
437
477
case X86::VMOVAPSZrm:
438
478
case X86::VMOVUPDZrm:
@@ -442,6 +482,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
442
482
{X86::VBROADCASTF32X4rm, 1 , 128 , rebuildSplatCst},
443
483
{X86::VBROADCASTF64X4rm, 1 , 256 , rebuildSplatCst}},
444
484
512 , 1 );
485
+ case X86::VMOVAPDZrmk:
486
+ case X86::VMOVUPDZrmk:
487
+ return FixupConstant ({{X86::VBROADCASTSDZrmk, 1 , 64 , rebuildSplatCst},
488
+ {X86::VBROADCASTF64X4rmk, 1 , 256 , rebuildSplatCst}},
489
+ 512 , 3 );
490
+ case X86::VMOVAPDZrmkz:
491
+ case X86::VMOVUPDZrmkz:
492
+ return FixupConstant ({{X86::VBROADCASTSDZrmkz, 1 , 64 , rebuildSplatCst},
493
+ {X86::VBROADCASTF64X4rmkz, 1 , 256 , rebuildSplatCst}},
494
+ 512 , 2 );
495
+ case X86::VMOVAPSZrmk:
496
+ case X86::VMOVUPSZrmk:
497
+ return FixupConstant ({{X86::VBROADCASTSSZrmk, 1 , 32 , rebuildSplatCst},
498
+ {X86::VBROADCASTF32X4rmk, 1 , 128 , rebuildSplatCst}},
499
+ 512 , 3 );
500
+ case X86::VMOVAPSZrmkz:
501
+ case X86::VMOVUPSZrmkz:
502
+ return FixupConstant ({{X86::VBROADCASTSSZrmkz, 1 , 32 , rebuildSplatCst},
503
+ {X86::VBROADCASTF32X4rmkz, 1 , 128 , rebuildSplatCst}},
504
+ 512 , 2 );
445
505
/* Integer Loads */
446
506
case X86::MOVDQArm:
447
507
case X86::MOVDQUrm: {
@@ -537,6 +597,42 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
537
597
{X86::VPMOVZXDQZ128rm, 2 , 32 , rebuildZExtCst}};
538
598
return FixupConstant (Fixups, 128 , 1 );
539
599
}
600
+ case X86::VMOVDQA32Z128rmk:
601
+ case X86::VMOVDQU32Z128rmk:
602
+ return FixupConstant ({{X86::VPBROADCASTDZ128rmk, 1 , 32 , rebuildSplatCst},
603
+ {X86::VPMOVSXBDZ128rmk, 4 , 8 , rebuildSExtCst},
604
+ {X86::VPMOVZXBDZ128rmk, 4 , 8 , rebuildZExtCst},
605
+ {X86::VPMOVSXWDZ128rmk, 4 , 16 , rebuildSExtCst},
606
+ {X86::VPMOVZXWDZ128rmk, 4 , 16 , rebuildZExtCst}},
607
+ 128 , 3 );
608
+ case X86::VMOVDQA32Z128rmkz:
609
+ case X86::VMOVDQU32Z128rmkz:
610
+ return FixupConstant ({{X86::VPBROADCASTDZ128rmkz, 1 , 32 , rebuildSplatCst},
611
+ {X86::VPMOVSXBDZ128rmkz, 4 , 8 , rebuildSExtCst},
612
+ {X86::VPMOVZXBDZ128rmkz, 4 , 8 , rebuildZExtCst},
613
+ {X86::VPMOVSXWDZ128rmkz, 4 , 16 , rebuildSExtCst},
614
+ {X86::VPMOVZXWDZ128rmkz, 4 , 16 , rebuildZExtCst}},
615
+ 128 , 2 );
616
+ case X86::VMOVDQA64Z128rmk:
617
+ case X86::VMOVDQU64Z128rmk:
618
+ return FixupConstant ({{X86::VPMOVSXBQZ128rmk, 2 , 8 , rebuildSExtCst},
619
+ {X86::VPMOVZXBQZ128rmk, 2 , 8 , rebuildZExtCst},
620
+ {X86::VPMOVSXWQZ128rmk, 2 , 16 , rebuildSExtCst},
621
+ {X86::VPMOVZXWQZ128rmk, 2 , 16 , rebuildZExtCst},
622
+ {X86::VPBROADCASTQZ128rmk, 1 , 64 , rebuildSplatCst},
623
+ {X86::VPMOVSXDQZ128rmk, 2 , 32 , rebuildSExtCst},
624
+ {X86::VPMOVZXDQZ128rmk, 2 , 32 , rebuildZExtCst}},
625
+ 128 , 3 );
626
+ case X86::VMOVDQA64Z128rmkz:
627
+ case X86::VMOVDQU64Z128rmkz:
628
+ return FixupConstant ({{X86::VPMOVSXBQZ128rmkz, 2 , 8 , rebuildSExtCst},
629
+ {X86::VPMOVZXBQZ128rmkz, 2 , 8 , rebuildZExtCst},
630
+ {X86::VPMOVSXWQZ128rmkz, 2 , 16 , rebuildSExtCst},
631
+ {X86::VPMOVZXWQZ128rmkz, 2 , 16 , rebuildZExtCst},
632
+ {X86::VPBROADCASTQZ128rmkz, 1 , 64 , rebuildSplatCst},
633
+ {X86::VPMOVSXDQZ128rmkz, 2 , 32 , rebuildSExtCst},
634
+ {X86::VPMOVZXDQZ128rmkz, 2 , 32 , rebuildZExtCst}},
635
+ 128 , 2 );
540
636
case X86::VMOVDQA32Z256rm:
541
637
case X86::VMOVDQA64Z256rm:
542
638
case X86::VMOVDQU32Z256rm:
@@ -561,6 +657,46 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
561
657
{X86::VPMOVZXDQZ256rm, 4 , 32 , rebuildZExtCst}};
562
658
return FixupConstant (Fixups, 256 , 1 );
563
659
}
660
+ case X86::VMOVDQA32Z256rmk:
661
+ case X86::VMOVDQU32Z256rmk:
662
+ return FixupConstant (
663
+ {{X86::VPBROADCASTDZ256rmk, 1 , 32 , rebuildSplatCst},
664
+ {X86::VPMOVSXBDZ256rmk, 8 , 8 , rebuildSExtCst},
665
+ {X86::VPMOVZXBDZ256rmk, 8 , 8 , rebuildZExtCst},
666
+ {X86::VBROADCASTI32X4Z256rmk, 1 , 128 , rebuildSplatCst},
667
+ {X86::VPMOVSXWDZ256rmk, 8 , 16 , rebuildSExtCst},
668
+ {X86::VPMOVZXWDZ256rmk, 8 , 16 , rebuildZExtCst}},
669
+ 256 , 3 );
670
+ case X86::VMOVDQA32Z256rmkz:
671
+ case X86::VMOVDQU32Z256rmkz:
672
+ return FixupConstant (
673
+ {{X86::VPBROADCASTDZ256rmkz, 1 , 32 , rebuildSplatCst},
674
+ {X86::VPMOVSXBDZ256rmkz, 8 , 8 , rebuildSExtCst},
675
+ {X86::VPMOVZXBDZ256rmkz, 8 , 8 , rebuildZExtCst},
676
+ {X86::VBROADCASTI32X4Z256rmkz, 1 , 128 , rebuildSplatCst},
677
+ {X86::VPMOVSXWDZ256rmkz, 8 , 16 , rebuildSExtCst},
678
+ {X86::VPMOVZXWDZ256rmkz, 8 , 16 , rebuildZExtCst}},
679
+ 256 , 2 );
680
+ case X86::VMOVDQA64Z256rmk:
681
+ case X86::VMOVDQU64Z256rmk:
682
+ return FixupConstant ({{X86::VPMOVSXBQZ256rmk, 4 , 8 , rebuildSExtCst},
683
+ {X86::VPMOVZXBQZ256rmk, 4 , 8 , rebuildZExtCst},
684
+ {X86::VPBROADCASTQZ256rmk, 1 , 64 , rebuildSplatCst},
685
+ {X86::VPMOVSXWQZ256rmk, 4 , 16 , rebuildSExtCst},
686
+ {X86::VPMOVZXWQZ256rmk, 4 , 16 , rebuildZExtCst},
687
+ {X86::VPMOVSXDQZ256rmk, 4 , 32 , rebuildSExtCst},
688
+ {X86::VPMOVZXDQZ256rmk, 4 , 32 , rebuildZExtCst}},
689
+ 256 , 3 );
690
+ case X86::VMOVDQA64Z256rmkz:
691
+ case X86::VMOVDQU64Z256rmkz:
692
+ return FixupConstant ({{X86::VPMOVSXBQZ256rmkz, 4 , 8 , rebuildSExtCst},
693
+ {X86::VPMOVZXBQZ256rmkz, 4 , 8 , rebuildZExtCst},
694
+ {X86::VPBROADCASTQZ256rmkz, 1 , 64 , rebuildSplatCst},
695
+ {X86::VPMOVSXWQZ256rmkz, 4 , 16 , rebuildSExtCst},
696
+ {X86::VPMOVZXWQZ256rmkz, 4 , 16 , rebuildZExtCst},
697
+ {X86::VPMOVSXDQZ256rmkz, 4 , 32 , rebuildSExtCst},
698
+ {X86::VPMOVZXDQZ256rmkz, 4 , 32 , rebuildZExtCst}},
699
+ 256 , 2 );
564
700
case X86::VMOVDQA32Zrm:
565
701
case X86::VMOVDQA64Zrm:
566
702
case X86::VMOVDQU32Zrm:
@@ -586,43 +722,93 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
586
722
{X86::VPMOVZXDQZrm, 8 , 32 , rebuildZExtCst}};
587
723
return FixupConstant (Fixups, 512 , 1 );
588
724
}
725
+ case X86::VMOVDQA32Zrmk:
726
+ case X86::VMOVDQU32Zrmk:
727
+ return FixupConstant ({{X86::VPBROADCASTDZrmk, 1 , 32 , rebuildSplatCst},
728
+ {X86::VBROADCASTI32X4rmk, 1 , 128 , rebuildSplatCst},
729
+ {X86::VPMOVSXBDZrmk, 16 , 8 , rebuildSExtCst},
730
+ {X86::VPMOVZXBDZrmk, 16 , 8 , rebuildZExtCst},
731
+ {X86::VPMOVSXWDZrmk, 16 , 16 , rebuildSExtCst},
732
+ {X86::VPMOVZXWDZrmk, 16 , 16 , rebuildZExtCst}},
733
+ 512 , 3 );
734
+ case X86::VMOVDQA32Zrmkz:
735
+ case X86::VMOVDQU32Zrmkz:
736
+ return FixupConstant ({{X86::VPBROADCASTDZrmkz, 1 , 32 , rebuildSplatCst},
737
+ {X86::VBROADCASTI32X4rmkz, 1 , 128 , rebuildSplatCst},
738
+ {X86::VPMOVSXBDZrmkz, 16 , 8 , rebuildSExtCst},
739
+ {X86::VPMOVZXBDZrmkz, 16 , 8 , rebuildZExtCst},
740
+ {X86::VPMOVSXWDZrmkz, 16 , 16 , rebuildSExtCst},
741
+ {X86::VPMOVZXWDZrmkz, 16 , 16 , rebuildZExtCst}},
742
+ 512 , 2 );
743
+ case X86::VMOVDQA64Zrmk:
744
+ case X86::VMOVDQU64Zrmk:
745
+ return FixupConstant ({{X86::VPBROADCASTQZrmk, 1 , 64 , rebuildSplatCst},
746
+ {X86::VPMOVSXBQZrmk, 8 , 8 , rebuildSExtCst},
747
+ {X86::VPMOVZXBQZrmk, 8 , 8 , rebuildZExtCst},
748
+ {X86::VPMOVSXWQZrmk, 8 , 16 , rebuildSExtCst},
749
+ {X86::VPMOVZXWQZrmk, 8 , 16 , rebuildZExtCst},
750
+ {X86::VBROADCASTI64X4rmk, 1 , 256 , rebuildSplatCst},
751
+ {X86::VPMOVSXDQZrmk, 8 , 32 , rebuildSExtCst},
752
+ {X86::VPMOVZXDQZrmk, 8 , 32 , rebuildZExtCst}},
753
+ 512 , 3 );
754
+ case X86::VMOVDQA64Zrmkz:
755
+ case X86::VMOVDQU64Zrmkz:
756
+ return FixupConstant ({{X86::VPBROADCASTQZrmkz, 1 , 64 , rebuildSplatCst},
757
+ {X86::VPMOVSXBQZrmkz, 8 , 8 , rebuildSExtCst},
758
+ {X86::VPMOVZXBQZrmkz, 8 , 8 , rebuildZExtCst},
759
+ {X86::VPMOVSXWQZrmkz, 8 , 16 , rebuildSExtCst},
760
+ {X86::VPMOVZXWQZrmkz, 8 , 16 , rebuildZExtCst},
761
+ {X86::VBROADCASTI64X4rmkz, 1 , 256 , rebuildSplatCst},
762
+ {X86::VPMOVSXDQZrmkz, 8 , 32 , rebuildSExtCst},
763
+ {X86::VPMOVZXDQZrmkz, 8 , 32 , rebuildZExtCst}},
764
+ 512 , 2 );
589
765
}
590
766
591
- auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
592
- unsigned OpBcst32 = 0 , OpBcst64 = 0 ;
593
- unsigned OpNoBcst32 = 0 , OpNoBcst64 = 0 ;
767
+ auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc16, unsigned OpSrc32,
768
+ unsigned OpSrc64) {
769
+ if (OpSrc16) {
770
+ if (const X86FoldTableEntry *Mem2Bcst =
771
+ llvm::lookupBroadcastFoldTableBySize (OpSrc16, 16 )) {
772
+ unsigned OpBcst16 = Mem2Bcst->DstOp ;
773
+ unsigned OpNoBcst16 = Mem2Bcst->Flags & TB_INDEX_MASK;
774
+ FixupEntry Fixups[] = {{(int )OpBcst16, 1 , 16 , rebuildSplatCst}};
775
+ // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
776
+ // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
777
+ if (FixupConstant (Fixups, 0 , OpNoBcst16))
778
+ return true ;
779
+ }
780
+ }
594
781
if (OpSrc32) {
595
782
if (const X86FoldTableEntry *Mem2Bcst =
596
783
llvm::lookupBroadcastFoldTableBySize (OpSrc32, 32 )) {
597
- OpBcst32 = Mem2Bcst->DstOp ;
598
- OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
784
+ unsigned OpBcst32 = Mem2Bcst->DstOp ;
785
+ unsigned OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
786
+ FixupEntry Fixups[] = {{(int )OpBcst32, 1 , 32 , rebuildSplatCst}};
787
+ // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
788
+ // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
789
+ if (FixupConstant (Fixups, 0 , OpNoBcst32))
790
+ return true ;
599
791
}
600
792
}
601
793
if (OpSrc64) {
602
794
if (const X86FoldTableEntry *Mem2Bcst =
603
795
llvm::lookupBroadcastFoldTableBySize (OpSrc64, 64 )) {
604
- OpBcst64 = Mem2Bcst->DstOp ;
605
- OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
796
+ unsigned OpBcst64 = Mem2Bcst->DstOp ;
797
+ unsigned OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
798
+ FixupEntry Fixups[] = {{(int )OpBcst64, 1 , 64 , rebuildSplatCst}};
799
+ // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
800
+ // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
801
+ if (FixupConstant (Fixups, 0 , OpNoBcst64))
802
+ return true ;
606
803
}
607
804
}
608
- assert (((OpBcst32 == 0 ) || (OpBcst64 == 0 ) || (OpNoBcst32 == OpNoBcst64)) &&
609
- " OperandNo mismatch" );
610
-
611
- if (OpBcst32 || OpBcst64) {
612
- unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
613
- FixupEntry Fixups[] = {{(int )OpBcst32, 32 , 32 , rebuildSplatCst},
614
- {(int )OpBcst64, 64 , 64 , rebuildSplatCst}};
615
- // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
616
- // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
617
- return FixupConstant (Fixups, 0 , OpNo);
618
- }
619
805
return false ;
620
806
};
621
807
622
808
// Attempt to find a AVX512 mapping from a full width memory-fold instruction
623
809
// to a broadcast-fold instruction variant.
624
810
if ((MI.getDesc ().TSFlags & X86II::EncodingMask) == X86II::EVEX)
625
- return ConvertToBroadcastAVX512 (Opc, Opc);
811
+ return ConvertToBroadcastAVX512 (Opc, Opc, Opc );
626
812
627
813
// Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
628
814
// conversion to see if we can convert to a broadcasted (integer) logic op.
@@ -679,7 +865,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
679
865
break ;
680
866
}
681
867
if (OpSrc32 || OpSrc64)
682
- return ConvertToBroadcastAVX512 (OpSrc32, OpSrc64);
868
+ return ConvertToBroadcastAVX512 (0 , OpSrc32, OpSrc64);
683
869
}
684
870
685
871
return false ;
0 commit comments