Skip to content

Commit 0d1d66b

Browse files
committed
[AMDGPU][True16][MC] VOP3 profile in True16 format
1 parent 89dff56 commit 0d1d66b

File tree

8 files changed

+651
-336
lines changed

8 files changed

+651
-336
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5441,8 +5441,12 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
54415441
case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64;
54425442
case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64;
54435443
case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64;
5444-
case AMDGPU::S_MINIMUM_F16: return AMDGPU::V_MINIMUM_F16_e64;
5445-
case AMDGPU::S_MAXIMUM_F16: return AMDGPU::V_MAXIMUM_F16_e64;
5444+
case AMDGPU::S_MINIMUM_F16:
5445+
return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
5446+
: AMDGPU::V_MINIMUM_F16_fake16_e64;
5447+
case AMDGPU::S_MAXIMUM_F16:
5448+
return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
5449+
: AMDGPU::V_MAXIMUM_F16_fake16_e64;
54465450
case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
54475451
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
54485452
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
@@ -7336,9 +7340,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
73367340
return;
73377341
}
73387342
case AMDGPU::S_MINIMUM_F32:
7339-
case AMDGPU::S_MAXIMUM_F32:
7340-
case AMDGPU::S_MINIMUM_F16:
7341-
case AMDGPU::S_MAXIMUM_F16: {
7343+
case AMDGPU::S_MAXIMUM_F32: {
73427344
const DebugLoc &DL = Inst.getDebugLoc();
73437345
Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
73447346
MachineInstr *NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
@@ -7349,7 +7351,28 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
73497351
.addImm(0) // clamp
73507352
.addImm(0); // omod
73517353
MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
7352-
7354+
legalizeOperands(*NewInstr, MDT);
7355+
addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
7356+
Inst.eraseFromParent();
7357+
return;
7358+
}
7359+
case AMDGPU::S_MINIMUM_F16:
7360+
case AMDGPU::S_MAXIMUM_F16: {
7361+
const DebugLoc &DL = Inst.getDebugLoc();
7362+
Register NewDst;
7363+
if (ST.useRealTrue16Insts())
7364+
NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7365+
else
7366+
NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7367+
MachineInstr *NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
7368+
.addImm(0) // src0_modifiers
7369+
.add(Inst.getOperand(1))
7370+
.addImm(0) // src1_modifiers
7371+
.add(Inst.getOperand(2))
7372+
.addImm(0) // clamp
7373+
.addImm(0) // omod
7374+
.addImm(0); // opsel0
7375+
MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
73537376
legalizeOperands(*NewInstr, MDT);
73547377
addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
73557378
Inst.eraseFromParent();

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2237,8 +2237,9 @@ class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
22372237
string clamp = !if(HasClamp, "$clamp", "");
22382238
string omod = !if(HasOMod, "$omod", "");
22392239

2240-
string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod, "");
2241-
2240+
string ret = dst#!if(!eq(NumSrcArgs,0),
2241+
"",
2242+
!if(HasDst,", ", "")#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod);
22422243
}
22432244

22442245
class getAsmVOP3DPP<string base> {
@@ -2578,12 +2579,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
25782579
HasModifiers, DstVT, IsFP8ByteSel>.ret;
25792580
field string Asm64 = AsmVOP3Base;
25802581
field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret;
2581-
field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
2582-
HasClamp,
2583-
HasOMod,
2584-
HasSrc0FloatMods,
2585-
HasSrc1FloatMods,
2586-
HasSrc2FloatMods>.ret;
2582+
field string AsmVOP3OpSel = AsmVOP3Base;
25872583
field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3Base>.ret;
25882584
field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3Base>.ret;
25892585
field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3Base>.ret;
@@ -2735,6 +2731,7 @@ def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
27352731
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
27362732
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
27372733
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
2734+
def VOP_I32_I32_I32_I16 : VOPProfile <[i32, i32, i32, i16]>;
27382735
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
27392736
def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
27402737
def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2444,6 +2444,7 @@ def : AMDGPUPat <
24442444
$src1), sub1)
24452445
>;
24462446

2447+
let True16Predicate = NotHasTrue16BitInsts in {
24472448
def : ROTRPattern <V_ALIGNBIT_B32_e64>;
24482449

24492450
def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
@@ -2453,6 +2454,30 @@ def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
24532454
def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
24542455
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
24552456
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
2457+
} // end OtherPredicates = [NotHasTrue16BitInsts]
2458+
2459+
let True16Predicate = UseFakeTrue16Insts in {
2460+
def ROTRPattern_fake16 : GCNPat <
2461+
(rotr i32:$src0, i32:$src1),
2462+
(V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
2463+
/* src1_modifiers */ 0, $src0,
2464+
/* src2_modifiers */ 0,
2465+
$src1, /* clamp */ 0, /* op_sel */ 0)
2466+
>;
2467+
def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
2468+
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
2469+
(i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
2470+
0, /* src1_modifiers */
2471+
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)),
2472+
0, /* src2_modifiers */
2473+
$src1, /* clamp */ 0, /* op_sel */ 0)>;
2474+
2475+
def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
2476+
(V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
2477+
/* src1_modifiers */ 0, $src1,
2478+
/* src2_modifiers */ 0,
2479+
$src2, /* clamp */ 0, /* op_sel */ 0)>;
2480+
} // end OtherPredicates = [UseFakeTrue16Insts]
24562481

24572482
/********** ====================== **********/
24582483
/********** Indirect addressing **********/
@@ -2932,6 +2957,7 @@ def : GCNPat <
29322957
(i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
29332958
>;
29342959

2960+
let True16Predicate = NotHasTrue16BitInsts in
29352961
def : GCNPat <
29362962
(i32 (bswap i32:$a)),
29372963
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
@@ -2941,6 +2967,7 @@ def : GCNPat <
29412967

29422968
// FIXME: This should have been narrowed to i32 during legalization.
29432969
// This pattern should also be skipped for GlobalISel
2970+
let True16Predicate = NotHasTrue16BitInsts in
29442971
def : GCNPat <
29452972
(i64 (bswap i64:$a)),
29462973
(REG_SEQUENCE VReg_64,
@@ -3312,6 +3339,7 @@ def : GCNPat <
33123339

33133340
// Take the upper 16 bits from V[0] and the lower 16 bits from V[1]
33143341
// Special case, can use V_ALIGNBIT (always uses encoded literal)
3342+
let True16Predicate = NotHasTrue16BitInsts in
33153343
def : GCNPat <
33163344
(vecTy (DivergentBinFrag<build_vector>
33173345
(Ty !if(!eq(Ty, i16),
@@ -3321,6 +3349,16 @@ def : GCNPat <
33213349
(V_ALIGNBIT_B32_e64 VGPR_32:$b, VGPR_32:$a, (i32 16))
33223350
>;
33233351

3352+
let True16Predicate = UseFakeTrue16Insts in
3353+
def : GCNPat <
3354+
(vecTy (DivergentBinFrag<build_vector>
3355+
(Ty !if(!eq(Ty, i16),
3356+
(Ty (trunc (srl VGPR_32:$a, (i32 16)))),
3357+
(Ty (bitconvert (i16 (trunc (srl VGPR_32:$a, (i32 16)))))))),
3358+
(Ty VGPR_32:$b))),
3359+
(V_ALIGNBIT_B32_fake16_e64 0, VGPR_32:$b, 0, VGPR_32:$a, 0, (i16 16), 0, 0)
3360+
>;
3361+
33243362
// Take the upper 16 bits from each VGPR_32 and concat them
33253363
def : GCNPat <
33263364
(vecTy (DivergentBinFrag<build_vector>
@@ -3682,12 +3720,14 @@ defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax>;
36823720
let OtherPredicates = [isGFX12Plus] in {
36833721
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
36843722
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
3685-
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
3686-
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
36873723
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
36883724
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
3689-
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
3690-
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
3725+
}
3726+
let True16Predicate = UseFakeTrue16Insts, OtherPredicates = [isGFX12Plus] in {
3727+
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
3728+
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_fake16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
3729+
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
3730+
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_fake16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
36913731
}
36923732

36933733
// Convert a floating-point power of 2 to the integer exponent.

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,8 +1682,8 @@ multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
16821682
VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;
16831683

16841684
multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName> :
1685-
VOP3Only_Realtriple_t16<GFX11Gen, op, asmName>,
1686-
VOP3Only_Realtriple_t16<GFX12Gen, op, asmName>;
1685+
VOP3_Realtriple_t16_gfx11<op, asmName, NAME, "", 1>,
1686+
VOP3_Realtriple_t16_gfx12<op, asmName, NAME, "", 1>;
16871687

16881688
multiclass VOP3beOnly_Realtriple_gfx11_gfx12<bits<10> op> :
16891689
VOP3beOnly_Realtriple<GFX11Gen, op>, VOP3beOnly_Realtriple<GFX12Gen, op>;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,8 @@ defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs
167167
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
168168
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
169169
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>;
170-
defm V_MINIMUM_F16 : VOP3Inst <"v_minimum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fminimum>>;
171-
defm V_MAXIMUM_F16 : VOP3Inst <"v_maximum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fmaximum>>;
170+
defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, DivergentBinFrag<fminimum>>;
171+
defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, DivergentBinFrag<fmaximum>>;
172172

173173
let SchedRW = [WriteDoubleAdd] in {
174174
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
@@ -208,7 +208,11 @@ defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>,
208208
defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
209209
defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
210210
defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
211-
defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>;
211+
212+
defm V_ALIGNBIT_B32 : VOP3Inst_t16_with_profiles <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>,
213+
VOP3_Profile_True16<VOP_I32_I32_I32_I16, VOP3_OPSEL>, VOP3_Profile_Fake16<VOP_I32_I32_I32_I16, VOP3_OPSEL>,
214+
fshr, null_frag>;
215+
212216
defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
213217

214218
// XXX - No FPException seems suspect but manual doesn't say it does
@@ -636,8 +640,8 @@ defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3
636640
defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
637641

638642
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
639-
defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfminimum3>;
640-
defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
643+
defm V_MINIMUM3_F16 : VOP3Inst_t16 <"v_minimum3_f16", VOP_F16_F16_F16_F16, AMDGPUfminimum3>;
644+
defm V_MAXIMUM3_F16 : VOP3Inst_t16 <"v_maximum3_f16", VOP_F16_F16_F16_F16, AMDGPUfmaximum3>;
641645
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
642646

643647
defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
@@ -940,8 +944,8 @@ let SubtargetPredicate = isGFX11Plus in {
940944
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
941945
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
942946
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
943-
defm V_MAXIMUMMINIMUM_F16 : VOP3Inst<"v_maximumminimum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
944-
defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
947+
defm V_MAXIMUMMINIMUM_F16 : VOP3Inst_t16<"v_maximumminimum_f16", VOP_F16_F16_F16_F16>;
948+
defm V_MINIMUMMAXIMUM_F16 : VOP3Inst_t16<"v_minimummaximum_f16", VOP_F16_F16_F16_F16>;
945949
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
946950

947951
let OtherPredicates = [HasDot9Insts], IsDOT=1 in {
@@ -1046,8 +1050,8 @@ defm V_MIN3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22b, "V_MIN3_F16",
10461050
defm V_MAX3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22c, "V_MAX3_F16", "v_max3_num_f16">;
10471051
defm V_MINIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22d>;
10481052
defm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>;
1049-
defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x22f>;
1050-
defm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x230>;
1053+
defm V_MINIMUM3_F16 : VOP3_Realtriple_t16_and_f16_gfx12<0x22f, "v_minimum3_f16">;
1054+
defm V_MAXIMUM3_F16 : VOP3_Realtriple_t16_and_f16_gfx12<0x230, "v_maximum3_f16">;
10511055
defm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32", "v_med3_num_f32">;
10521056
defm V_MED3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x232, "V_MED3_F16", "v_med3_num_f16">;
10531057
defm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">;
@@ -1056,8 +1060,8 @@ defm V_MINMAX_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26a, "V_MINMAX_F16
10561060
defm V_MAXMIN_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26b, "V_MAXMIN_F16", "v_maxmin_num_f16">;
10571061
defm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>;
10581062
defm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>;
1059-
defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26e>;
1060-
defm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26f>;
1063+
defm V_MINIMUMMAXIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12<0x26e, "v_minimummaximum_f16">;
1064+
defm V_MAXIMUMMINIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12<0x26f, "v_maximumminimum_f16">;
10611065
defm V_S_EXP_F32 : VOP3Only_Real_Base_gfx12<0x280>;
10621066
defm V_S_EXP_F16 : VOP3Only_Real_Base_gfx12<0x281>;
10631067
defm V_S_LOG_F32 : VOP3Only_Real_Base_gfx12<0x282>;
@@ -1074,8 +1078,8 @@ defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>;
10741078
defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>;
10751079
defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>;
10761080
defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>;
1077-
defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x367>;
1078-
defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x368>;
1081+
defm V_MINIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12<0x367, "v_minimum_f16">;
1082+
defm V_MAXIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12<0x368, "v_maximum_f16">;
10791083

10801084
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
10811085
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
@@ -1108,6 +1112,17 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
11081112
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
11091113
VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
11101114

1115+
multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1116+
string pseudo_mnemonic = "", bit isSingle = 0> :
1117+
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
1118+
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
1119+
1120+
multiclass VOP3_Realtriple_t16_and_f16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1121+
string pseudo_mnemonic = "", bit isSingle = 0> {
1122+
defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
1123+
defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
1124+
}
1125+
11111126
multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
11121127
VOP3be_Real<GFX11Gen, op, opName, asmName>,
11131128
VOP3be_Real<GFX12Gen, op, opName, asmName>;
@@ -1128,7 +1143,7 @@ defm V_BFI_B32 : VOP3_Realtriple_gfx11_gfx12<0x212>;
11281143
defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>;
11291144
defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>;
11301145
defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>;
1131-
defm V_ALIGNBIT_B32 : VOP3_Realtriple_gfx11_gfx12<0x216>;
1146+
defm V_ALIGNBIT_B32 : VOP3_Realtriple_t16_and_f16_gfx11_gfx12<0x216, "v_alignbit_b32">;
11321147
defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>;
11331148
defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>;
11341149
defm V_MIN3_F32 : VOP3_Realtriple_gfx11<0x219>;

0 commit comments

Comments
 (0)