@@ -50,6 +50,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
50
50
return MRI.getType (Reg) == LLT::scalar (32 );
51
51
case S64:
52
52
return MRI.getType (Reg) == LLT::scalar (64 );
53
+ case P0:
54
+ return MRI.getType (Reg) == LLT::pointer (0 , 64 );
53
55
case P1:
54
56
return MRI.getType (Reg) == LLT::pointer (1 , 64 );
55
57
case P3:
@@ -58,6 +60,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
58
60
return MRI.getType (Reg) == LLT::pointer (4 , 64 );
59
61
case P5:
60
62
return MRI.getType (Reg) == LLT::pointer (5 , 32 );
63
+ case V4S32:
64
+ return MRI.getType (Reg) == LLT::fixed_vector (4 , 32 );
61
65
case B32:
62
66
return MRI.getType (Reg).getSizeInBits () == 32 ;
63
67
case B64:
@@ -78,6 +82,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
78
82
return MRI.getType (Reg) == LLT::scalar (32 ) && MUI.isUniform (Reg);
79
83
case UniS64:
80
84
return MRI.getType (Reg) == LLT::scalar (64 ) && MUI.isUniform (Reg);
85
+ case UniP0:
86
+ return MRI.getType (Reg) == LLT::pointer (0 , 64 ) && MUI.isUniform (Reg);
81
87
case UniP1:
82
88
return MRI.getType (Reg) == LLT::pointer (1 , 64 ) && MUI.isUniform (Reg);
83
89
case UniP3:
@@ -104,6 +110,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
104
110
return MRI.getType (Reg) == LLT::scalar (32 ) && MUI.isDivergent (Reg);
105
111
case DivS64:
106
112
return MRI.getType (Reg) == LLT::scalar (64 ) && MUI.isDivergent (Reg);
113
+ case DivP0:
114
+ return MRI.getType (Reg) == LLT::pointer (0 , 64 ) && MUI.isDivergent (Reg);
107
115
case DivP1:
108
116
return MRI.getType (Reg) == LLT::pointer (1 , 64 ) && MUI.isDivergent (Reg);
109
117
case DivP3:
@@ -315,13 +323,15 @@ RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const {
315
323
Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
316
324
unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID ();
317
325
if (!IRulesAlias.contains (IntrID)) {
326
+ MI.dump ();
318
327
LLVM_DEBUG (dbgs () << " MI: " ; MI.dump (););
319
328
llvm_unreachable (" No rules defined for intrinsic opcode" );
320
329
}
321
330
return IRules.at (IRulesAlias.at (IntrID));
322
331
}
323
332
324
333
if (!GRulesAlias.contains (Opc)) {
334
+ MI.dump ();
325
335
LLVM_DEBUG (dbgs () << " MI: " ; MI.dump (););
326
336
llvm_unreachable (" No rules defined for generic opcode" );
327
337
}
@@ -431,16 +441,21 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
431
441
addRulesForGOpcs ({G_XOR, G_OR, G_AND}, StandardB)
432
442
.Any ({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
433
443
.Any ({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
444
+ .Div (B32, {{VgprB32}, {VgprB32, VgprB32}})
445
+ .Uni (B64, {{SgprB64}, {SgprB64, SgprB64}})
434
446
.Div (B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
435
447
436
448
addRulesForGOpcs ({G_SHL}, Standard)
449
+ .Div (S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
437
450
.Uni (S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
438
451
.Div (S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
439
452
440
453
// Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
441
454
// and G_FREEZE here, rest is trivially regbankselected earlier
455
+ addRulesForGOpcs ({G_IMPLICIT_DEF}).Any ({{UniS1}, {{Sgpr32Trunc}, {}}});
442
456
addRulesForGOpcs ({G_CONSTANT})
443
457
.Any ({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
458
+ addRulesForGOpcs ({G_FREEZE}).Any ({{DivS1}, {{Vcc}, {Vcc}}});
444
459
445
460
addRulesForGOpcs ({G_ICMP})
446
461
.Any ({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
@@ -471,6 +486,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
471
486
472
487
addRulesForGOpcs ({G_ZEXT, G_SEXT})
473
488
.Any ({{UniS32, S1}, {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
489
+ .Any ({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
474
490
.Any ({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
475
491
.Any ({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}});
476
492
@@ -525,9 +541,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
525
541
526
542
// clang-format off
527
543
addRulesForGOpcs ({G_LOAD})
544
+ .Any ({{DivB32, DivP0}, {{VgprB32}, {VgprP0}}})
545
+
528
546
.Any ({{DivB32, DivP1}, {{VgprB32}, {VgprP1}}})
529
547
.Any ({{{UniB256, UniP1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
530
548
.Any ({{{UniB512, UniP1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
549
+ .Any ({{{UniB32, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}})
531
550
.Any ({{{UniB256, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {VgprP1}, SplitLoad}})
532
551
.Any ({{{UniB512, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {VgprP1}, SplitLoad}})
533
552
@@ -556,15 +575,26 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
556
575
// clang-format on
557
576
558
577
addRulesForGOpcs ({G_AMDGPU_BUFFER_LOAD}, Vector)
578
+ .Div (S32, {{Vgpr32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
579
+ .Uni (S32, {{UniInVgprS32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
559
580
.Div (V4S32, {{VgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
560
581
.Uni (V4S32, {{UniInVgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}});
561
582
562
583
addRulesForGOpcs ({G_STORE})
584
+ .Any ({{S32, P0}, {{}, {Vgpr32, VgprP0}}})
563
585
.Any ({{S32, P1}, {{}, {Vgpr32, VgprP1}}})
564
586
.Any ({{S64, P1}, {{}, {Vgpr64, VgprP1}}})
565
587
.Any ({{V4S32, P1}, {{}, {VgprV4S32, VgprP1}}});
566
588
567
- addRulesForGOpcs ({G_PTR_ADD}).Any ({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}});
589
+ addRulesForGOpcs ({G_AMDGPU_BUFFER_STORE})
590
+ .Any ({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
591
+
592
+ addRulesForGOpcs ({G_PTR_ADD})
593
+ .Any ({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
594
+ .Any ({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}})
595
+ .Any ({{DivP0}, {{VgprP0}, {VgprP0, Vgpr64}}});
596
+
597
+ addRulesForGOpcs ({G_INTTOPTR}).Any ({{UniP4}, {{SgprP4}, {Sgpr64}}});
568
598
569
599
addRulesForGOpcs ({G_ABS}, Standard).Uni (S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
570
600
@@ -580,15 +610,24 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
580
610
.Any ({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
581
611
582
612
addRulesForGOpcs ({G_UITOFP})
613
+ .Any ({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
583
614
.Any ({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
584
615
.Any ({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
585
616
586
617
using namespace Intrinsic ;
587
618
619
+ addRulesForIOpcs ({amdgcn_s_getpc}).Any ({{UniS64, _}, {{Sgpr64}, {None}}});
620
+
588
621
// This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
589
622
addRulesForIOpcs ({amdgcn_end_cf}).Any ({{_, S32}, {{}, {None, Sgpr32}}});
590
623
591
624
addRulesForIOpcs ({amdgcn_if_break}, Standard)
592
625
.Uni (S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
593
626
627
+ addRulesForIOpcs ({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
628
+ .Div (S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
629
+
630
+ addRulesForIOpcs ({amdgcn_readfirstlane})
631
+ .Any ({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}});
632
+
594
633
} // end initialize rules
0 commit comments