Skip to content

[AMDGPU][True16][MC] fix fmac_f16_t16 vop3 format #135464

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,9 @@ bool isMAC(unsigned Opc) {
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this function doing anything more than has tied operand? This should be moved to something on the instruction definition

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I agree. I think it only takes care of the tied operand.

Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
Expand Down
23 changes: 16 additions & 7 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --sort --version 5
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s

Expand Down Expand Up @@ -56,14 +56,23 @@ v_add_f16 v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
v_add_f16 v5.l, v255.l, v2.l quad_perm:[3,2,1,0]
// GFX11: v_add_f16_e64_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]

v_fmac_f16 v255, v1, v2
// GFX11: v_fmac_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00]
v_fmac_f16 v255.h, v1.h, v2.h
// GFX11: v_fmac_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1,1] ; encoding: [0xff,0x58,0x36,0xd5,0x01,0x05,0x02,0x00]

v_fmac_f16 v5, v1, v255
// GFX11: v_fmac_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00]
v_fmac_f16 v255.l, v1.l, v2.l
// GFX11: v_fmac_f16_e64 v255.l, v1.l, v2.l ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00]

v_fmac_f16 v5, v255, v2
// GFX11: v_fmac_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00]
v_fmac_f16 v5.h, v1.h, v255.h
// GFX11: v_fmac_f16_e64 v5.h, v1.h, v255.h op_sel:[1,1,1,1] ; encoding: [0x05,0x58,0x36,0xd5,0x01,0xff,0x03,0x00]

v_fmac_f16 v5.h, v255.h, v2.h
// GFX11: v_fmac_f16_e64 v5.h, v255.h, v2.h op_sel:[1,1,1,1] ; encoding: [0x05,0x58,0x36,0xd5,0xff,0x05,0x02,0x00]

v_fmac_f16 v5.l, v1.l, v255.l
// GFX11: v_fmac_f16_e64 v5.l, v1.l, v255.l ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00]

v_fmac_f16 v5.l, v255.l, v2.l
// GFX11: v_fmac_f16_e64 v5.l, v255.l, v2.l ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00]

v_ldexp_f16 v255.h, v1.h, v2.h
// GFX11: v_ldexp_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1] ; encoding: [0xff,0x58,0x3b,0xd5,0x01,0x05,0x02,0x00]
Expand Down
68 changes: 40 additions & 28 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s
Original file line number Diff line number Diff line change
Expand Up @@ -524,47 +524,59 @@ v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3
v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]

v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]

v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]

v_fmac_f16_e64_dpp v5, v1, v2 row_mirror
// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_mirror
// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]

v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror
// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]

v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1
// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1
// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]

v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15
// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15
// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]

v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1
// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1
// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]

v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15
// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15
// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]

v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1
// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]

v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15
// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]

v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]

v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
// GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]
v_fmac_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
// GFX11: v_fmac_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]

v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13]
v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX11: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13]

v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30]
v_fmac_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_fmac_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30]

v_fmac_f16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
// GFX11: v_fmac_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]

v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
// GFX11: v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]

v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX11: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13]

v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc3,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30]

v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
Expand Down
28 changes: 20 additions & 8 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s
Original file line number Diff line number Diff line change
Expand Up @@ -169,17 +169,29 @@ v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x2f,0xd5,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]

v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
v_fmac_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]

v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]
v_fmac_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_fmac_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]

v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05]
v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX11: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05]

v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]
v_fmac_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: v_fmac_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]

v_fmac_f16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_fmac_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]

v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]

v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX11: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05]

v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc3,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]

v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
Expand Down
69 changes: 39 additions & 30 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s
Original file line number Diff line number Diff line change
Expand Up @@ -587,50 +587,59 @@ v_fmac_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4
v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2
// GFX11: v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf]

v_fmac_f16_e64 v5, v1, v2
// GFX11: v_fmac_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00]
v_fmac_f16_e64 v5.l, v1.l, v2.l
// GFX11: v_fmac_f16_e64 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00]

v_fmac_f16_e64 v5, v255, v255
// GFX11: v_fmac_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00]
v_fmac_f16_e64 v5.l, v255.l, v255.l
// GFX11: v_fmac_f16_e64 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00]

v_fmac_f16_e64 v5, s1, s2
// GFX11: v_fmac_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00]
v_fmac_f16_e64 v5.l, s1, s2
// GFX11: v_fmac_f16_e64 v5.l, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00]

v_fmac_f16_e64 v5, s105, s105
// GFX11: v_fmac_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00]
v_fmac_f16_e64 v5.l, s105, s105
// GFX11: v_fmac_f16_e64 v5.l, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00]

v_fmac_f16_e64 v5, vcc_lo, ttmp15
// GFX11: v_fmac_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00]
v_fmac_f16_e64 v5.l, vcc_lo, ttmp15
// GFX11: v_fmac_f16_e64 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00]

v_fmac_f16_e64 v5, vcc_hi, 0xfe0b
// GFX11: v_fmac_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
v_fmac_f16_e64 v5.l, vcc_hi, 0xfe0b
// GFX11: v_fmac_f16_e64 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]

v_fmac_f16_e64 v5, ttmp15, src_scc
// GFX11: v_fmac_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00]
v_fmac_f16_e64 v5.l, ttmp15, src_scc
// GFX11: v_fmac_f16_e64 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00]

v_fmac_f16_e64 v5, m0, 0.5
// GFX11: v_fmac_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00]
v_fmac_f16_e64 v5.l, m0, 0.5
// GFX11: v_fmac_f16_e64 v5.l, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00]

v_fmac_f16_e64 v5, exec_lo, -1
// GFX11: v_fmac_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00]
v_fmac_f16_e64 v5.l, exec_lo, -1
// GFX11: v_fmac_f16_e64 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00]

v_fmac_f16_e64 v5, |exec_hi|, null
// GFX11: v_fmac_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00]
v_fmac_f16_e64 v5.l, |exec_hi|, null
// GFX11: v_fmac_f16_e64 v5.l, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00]

v_fmac_f16_e64 v5, null, exec_lo
// GFX11: v_fmac_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00]
v_fmac_f16_e64 v5.l, null, exec_lo
// GFX11: v_fmac_f16_e64 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00]

v_fmac_f16_e64 v5, -1, exec_hi
// GFX11: v_fmac_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00]
v_fmac_f16_e64 v5.l, -1, exec_hi
// GFX11: v_fmac_f16_e64 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00]

v_fmac_f16_e64 v5, 0.5, -m0 mul:2
// GFX11: v_fmac_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48]
v_fmac_f16_e64 v5.l, 0.5, -m0 mul:2
// GFX11: v_fmac_f16_e64 v5.l, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48]

v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4
// GFX11: v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30]
v_fmac_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4
// GFX11: v_fmac_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30]

v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2
// GFX11: v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00]
v_fmac_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2
// GFX11: v_fmac_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00]

v_fmac_f16_e64 v5.l, v1.h, v2.l
// GFX11: v_fmac_f16_e64 v5.l, v1.h, v2.l op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x36,0xd5,0x01,0x05,0x02,0x00]

v_fmac_f16_e64 v5.l, v255.l, v255.h
// GFX11: v_fmac_f16_e64 v5.l, v255.l, v255.h op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x36,0xd5,0xff,0xff,0x03,0x00]

v_fmac_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| clamp div:2
// GFX11: v_fmac_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| op_sel:[0,0,1,1] clamp div:2 ; encoding: [0xff,0xc3,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00]

v_fmac_f32_e64 v5, v1, v2
// GFX11: v_fmac_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00]
Expand Down
Loading