@@ -167,8 +167,8 @@ defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs
167
167
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
168
168
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
169
169
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>;
170
- defm V_MINIMUM_F16 : VOP3Inst <"v_minimum_f16", VOP3_Profile< VOP_F16_F16_F16> , DivergentBinFrag<fminimum>>;
171
- defm V_MAXIMUM_F16 : VOP3Inst <"v_maximum_f16", VOP3_Profile< VOP_F16_F16_F16> , DivergentBinFrag<fmaximum>>;
170
+ defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, DivergentBinFrag<fminimum>>;
171
+ defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, DivergentBinFrag<fmaximum>>;
172
172
173
173
let SchedRW = [WriteDoubleAdd] in {
174
174
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
@@ -208,7 +208,11 @@ defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>,
208
208
defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
209
209
defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
210
210
defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
211
- defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>;
211
+
212
+ defm V_ALIGNBIT_B32 : VOP3Inst_t16_with_profiles <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>,
213
+ VOP3_Profile_True16<VOP_I32_I32_I32_I16, VOP3_OPSEL>, VOP3_Profile_Fake16<VOP_I32_I32_I32_I16, VOP3_OPSEL>,
214
+ fshr, null_frag>;
215
+
212
216
defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
213
217
214
218
// XXX - No FPException seems suspect but manual doesn't say it does
@@ -636,8 +640,8 @@ defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3
636
640
defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
637
641
638
642
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
639
- defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile< VOP_F16_F16_F16_F16, VOP3_OPSEL> , AMDGPUfminimum3>;
640
- defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile< VOP_F16_F16_F16_F16, VOP3_OPSEL> , AMDGPUfmaximum3>;
643
+ defm V_MINIMUM3_F16 : VOP3Inst_t16 <"v_minimum3_f16", VOP_F16_F16_F16_F16, AMDGPUfminimum3>;
644
+ defm V_MAXIMUM3_F16 : VOP3Inst_t16 <"v_maximum3_f16", VOP_F16_F16_F16_F16, AMDGPUfmaximum3>;
641
645
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
642
646
643
647
defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
@@ -940,8 +944,8 @@ let SubtargetPredicate = isGFX11Plus in {
940
944
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
941
945
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
942
946
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
943
- defm V_MAXIMUMMINIMUM_F16 : VOP3Inst <"v_maximumminimum_f16", VOP3_Profile< VOP_F16_F16_F16_F16, VOP3_OPSEL> >;
944
- defm V_MINIMUMMAXIMUM_F16 : VOP3Inst <"v_minimummaximum_f16", VOP3_Profile< VOP_F16_F16_F16_F16, VOP3_OPSEL> >;
947
+ defm V_MAXIMUMMINIMUM_F16 : VOP3Inst_t16 <"v_maximumminimum_f16", VOP_F16_F16_F16_F16>;
948
+ defm V_MINIMUMMAXIMUM_F16 : VOP3Inst_t16 <"v_minimummaximum_f16", VOP_F16_F16_F16_F16>;
945
949
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
946
950
947
951
let OtherPredicates = [HasDot9Insts], IsDOT=1 in {
@@ -1046,8 +1050,8 @@ defm V_MIN3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22b, "V_MIN3_F16",
1046
1050
defm V_MAX3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22c, "V_MAX3_F16", "v_max3_num_f16">;
1047
1051
defm V_MINIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22d>;
1048
1052
defm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>;
1049
- defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12 <0x22f>;
1050
- defm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12 <0x230>;
1053
+ defm V_MINIMUM3_F16 : VOP3_Realtriple_t16_and_f16_gfx12 <0x22f, "v_minimum3_f16" >;
1054
+ defm V_MAXIMUM3_F16 : VOP3_Realtriple_t16_and_f16_gfx12 <0x230, "v_maximum3_f16" >;
1051
1055
defm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32", "v_med3_num_f32">;
1052
1056
defm V_MED3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x232, "V_MED3_F16", "v_med3_num_f16">;
1053
1057
defm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">;
@@ -1056,8 +1060,8 @@ defm V_MINMAX_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26a, "V_MINMAX_F16
1056
1060
defm V_MAXMIN_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26b, "V_MAXMIN_F16", "v_maxmin_num_f16">;
1057
1061
defm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>;
1058
1062
defm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>;
1059
- defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12 <0x26e>;
1060
- defm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12 <0x26f>;
1063
+ defm V_MINIMUMMAXIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12 <0x26e, "v_minimummaximum_f16" >;
1064
+ defm V_MAXIMUMMINIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12 <0x26f, "v_maximumminimum_f16" >;
1061
1065
defm V_S_EXP_F32 : VOP3Only_Real_Base_gfx12<0x280>;
1062
1066
defm V_S_EXP_F16 : VOP3Only_Real_Base_gfx12<0x281>;
1063
1067
defm V_S_LOG_F32 : VOP3Only_Real_Base_gfx12<0x282>;
@@ -1074,8 +1078,8 @@ defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>;
1074
1078
defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>;
1075
1079
defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>;
1076
1080
defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>;
1077
- defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12 <0x367>;
1078
- defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12 <0x368>;
1081
+ defm V_MINIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12 <0x367, "v_minimum_f16" >;
1082
+ defm V_MAXIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12 <0x368, "v_maximum_f16" >;
1079
1083
1080
1084
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
1081
1085
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
@@ -1108,6 +1112,17 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
1108
1112
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
1109
1113
VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
1110
1114
1115
+ multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1116
+ string pseudo_mnemonic = "", bit isSingle = 0> :
1117
+ VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
1118
+ VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
1119
+
1120
+ multiclass VOP3_Realtriple_t16_and_f16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1121
+ string pseudo_mnemonic = "", bit isSingle = 0> {
1122
+ defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
1123
+ defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
1124
+ }
1125
+
1111
1126
multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
1112
1127
VOP3be_Real<GFX11Gen, op, opName, asmName>,
1113
1128
VOP3be_Real<GFX12Gen, op, opName, asmName>;
@@ -1128,7 +1143,7 @@ defm V_BFI_B32 : VOP3_Realtriple_gfx11_gfx12<0x212>;
1128
1143
defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>;
1129
1144
defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>;
1130
1145
defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>;
1131
- defm V_ALIGNBIT_B32 : VOP3_Realtriple_gfx11_gfx12 <0x216>;
1146
+ defm V_ALIGNBIT_B32 : VOP3_Realtriple_t16_and_f16_gfx11_gfx12 <0x216, "v_alignbit_b32" >;
1132
1147
defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>;
1133
1148
defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>;
1134
1149
defm V_MIN3_F32 : VOP3_Realtriple_gfx11<0x219>;
0 commit comments