Skip to content

Commit dc0ea0f

Browse files
authored
[AMDGPU][True16][MC] true16 for v_cvt_pknorm_i16/u16_f16 (llvm#119605)
Support true16 format for v_cvt_pknorm_i16/u16_f16 in MC.
1 parent 8e8692a commit dc0ea0f

13 files changed

+696
-190
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -643,8 +643,8 @@ defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
643643
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
644644
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
645645

646-
defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
647-
defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
646+
defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
647+
defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
648648

649649
defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>;
650650

@@ -1755,8 +1755,8 @@ defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30
17551755
defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">;
17561756
defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">;
17571757
defm V_PACK_B32_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x311, "v_pack_b32_f16">;
1758-
defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
1759-
defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
1758+
defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x312, "v_cvt_pk_norm_i16_f16", "V_CVT_PKNORM_I16_F16", "v_cvt_pknorm_i16_f16">;
1759+
defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x313, "v_cvt_pk_norm_u16_f16", "V_CVT_PKNORM_U16_F16", "v_cvt_pknorm_u16_f16">;
17601760
defm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x325, "V_SUB_I32", "v_sub_nc_i32">;
17611761
defm V_ADD_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x326, "V_ADD_I32", "v_add_nc_i32">;
17621762
defm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>;

llvm/test/MC/AMDGPU/gfx11_asm_vop3.s

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,11 +1268,11 @@ v_cvt_pk_i16_i32 v5, src_scc, vcc_lo
12681268
v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi
12691269
// GFX11: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
12701270

1271-
v_cvt_pk_norm_i16_f16 v5, v1, v2
1272-
// GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
1271+
v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l
1272+
// GFX11: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
12731273

1274-
v_cvt_pk_norm_i16_f16 v5, v255, v255
1275-
// GFX11: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
1274+
v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l
1275+
// GFX11: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
12761276

12771277
v_cvt_pk_norm_i16_f16 v5, s1, s2
12781278
// GFX11: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00]
@@ -1304,7 +1304,7 @@ v_cvt_pk_norm_i16_f16 v5, null, exec_lo
13041304
v_cvt_pk_norm_i16_f16 v5, -1, exec_hi
13051305
// GFX11: v_cvt_pk_norm_i16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00]
13061306

1307-
v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0]
1307+
v_cvt_pk_norm_i16_f16 v5, 0.5, -m0
13081308
// GFX11: v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40]
13091309

13101310
v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
@@ -1313,11 +1313,23 @@ v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
13131313
v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0]
13141314
// GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
13151315

1316-
v_cvt_pk_norm_u16_f16 v5, v1, v2
1317-
// GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
1316+
v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l
1317+
// GFX11: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
13181318

1319-
v_cvt_pk_norm_u16_f16 v5, v255, v255
1320-
// GFX11: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
1319+
v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h
1320+
// GFX11: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
1321+
1322+
v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo|
1323+
// GFX11: v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x12,0xd7,0xfd,0xd4,0x00,0x20]
1324+
1325+
v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi|
1326+
// GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| ; encoding: [0xff,0x03,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
1327+
1328+
v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l
1329+
// GFX11: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
1330+
1331+
v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l
1332+
// GFX11: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
13211333

13221334
v_cvt_pk_norm_u16_f16 v5, s1, s2
13231335
// GFX11: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00]
@@ -1349,7 +1361,7 @@ v_cvt_pk_norm_u16_f16 v5, null, exec_lo
13491361
v_cvt_pk_norm_u16_f16 v5, -1, exec_hi
13501362
// GFX11: v_cvt_pk_norm_u16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00]
13511363

1352-
v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0]
1364+
v_cvt_pk_norm_u16_f16 v5, 0.5, -m0
13531365
// GFX11: v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40]
13541366

13551367
v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
@@ -1358,6 +1370,18 @@ v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
13581370
v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0]
13591371
// GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
13601372

1373+
v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l
1374+
// GFX11: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
1375+
1376+
v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h
1377+
// GFX11: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
1378+
1379+
v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo|
1380+
// GFX11: v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x13,0xd7,0xfd,0xd4,0x00,0x20]
1381+
1382+
v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi|
1383+
// GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| ; encoding: [0xff,0x03,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
1384+
13611385
v_cvt_pk_u16_f32 v5, v1, v2
13621386
// GFX11: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00]
13631387

0 commit comments

Comments
 (0)