-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU][True16][MC] update a few mc test for true16 #135816
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][True16][MC] update a few mc test for true16 #135816
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Brox Chen (broxigarchen) ChangesThis is another NFC patch. Update mc test for a few true16 instructions by duplicating the file to fake16 versions and udpate Patch is 46.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/135816.diff 8 Files Affected:
diff --git a/llvm/test/MC/AMDGPU/bf16_imm-fake16.s b/llvm/test/MC/AMDGPU/bf16_imm-fake16.s
new file mode 100644
index 0000000000000..ee697bee6ab2d
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/bf16_imm-fake16.s
@@ -0,0 +1,114 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s | FileCheck %s
+
+v_dot2_bf16_bf16 v5, v1, v2, 100.0
+// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 0x42c8 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00]
+
+v_dot2_bf16_bf16 v2, v0, 1.0, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 1.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe5,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, 1.0, v0, v2
+// CHECK: v_dot2_bf16_bf16 v2, 1.0, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xf2,0x00,0x0a,0x04]
+
+v_dot2_bf16_bf16 v5, v1, v2, 1.0
+// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 1.0 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]
+
+v_dot2_bf16_bf16 v2, v0, -1.0, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, -1.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe7,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, 0.5, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 0.5, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe1,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, -0.5, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, -0.5, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe3,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, 2.0, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 2.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe9,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, -2.0, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, -2.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xeb,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, 4.0, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 4.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xed,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, -4.0, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, -4.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xef,0x09,0x04]
+
+// Check 1/(2*pi) rounded value and ideomatic fp32 0.15915494 value
+// which cannot be accurately represented in bf16.
+
+v_dot2_bf16_bf16 v2, v0, 0.158203125, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, 0.15915494, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, 0x3e22, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, v2, 0.15915494
+// CHECK: v_dot2_bf16_bf16 v2, v0, v2, 0.15915494 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0x05,0xe2,0x03]
+
+v_dot2_f32_bf16 v2, v1, 0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0x01,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, 0.5, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 0.5, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe1,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, -0.5, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, -0.5, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe3,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, 1.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 1.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe5,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, -1.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, -1.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe7,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, 2.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 2.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe9,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, -2.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, -2.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xeb,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, 4.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 4.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xed,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, -4.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, -4.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xef,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, 0.15915494, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 0.15915494, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xf1,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, 0x3e22, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 0.15915494, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xf1,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, 0.5, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, 0.5, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf0,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, -0.5, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, -0.5, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf1,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, 1.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, 1.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf2,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, -1.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, -1.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf3,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, 2.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, 2.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf4,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, -2.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, -2.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf5,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, 4.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, 4.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf6,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, -4.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, -4.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf7,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, 100.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, 0x42c8, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xff,0x02,0x0a,0x1c,0xc8,0x42,0x00,0x00]
+
+v_dot2_f32_bf16 v2, v1, 100.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 0x42c8, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xff,0x09,0x1c,0xc8,0x42,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/bf16_imm.s b/llvm/test/MC/AMDGPU/bf16_imm.s
index 7cf18103adfe5..d79649073aa89 100644
--- a/llvm/test/MC/AMDGPU/bf16_imm.s
+++ b/llvm/test/MC/AMDGPU/bf16_imm.s
@@ -1,54 +1,54 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s | FileCheck %s
-v_dot2_bf16_bf16 v5, v1, v2, 100.0
-// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 0x42c8 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00]
+v_dot2_bf16_bf16 v5.l, v1, v2, 100.0
+// CHECK: v_dot2_bf16_bf16 v5.l, v1, v2, 0x42c8 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00]
-v_dot2_bf16_bf16 v2, v0, 1.0, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 1.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe5,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 1.0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 1.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe5,0x09,0x04]
-v_dot2_bf16_bf16 v2, 1.0, v0, v2
-// CHECK: v_dot2_bf16_bf16 v2, 1.0, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xf2,0x00,0x0a,0x04]
+v_dot2_bf16_bf16 v2.l, 1.0, v0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, 1.0, v0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0xf2,0x00,0x0a,0x04]
-v_dot2_bf16_bf16 v5, v1, v2, 1.0
-// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 1.0 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]
+v_dot2_bf16_bf16 v5.l, v1, v2, 1.0
+// CHECK: v_dot2_bf16_bf16 v5.l, v1, v2, 1.0 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]
-v_dot2_bf16_bf16 v2, v0, -1.0, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, -1.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe7,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, -1.0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, -1.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe7,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, 0.5, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 0.5, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe1,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 0.5, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 0.5, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe1,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, -0.5, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, -0.5, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe3,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, -0.5, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, -0.5, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe3,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, 2.0, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 2.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe9,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 2.0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 2.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe9,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, -2.0, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, -2.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xeb,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, -2.0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, -2.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xeb,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, 4.0, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 4.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xed,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 4.0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 4.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xed,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, -4.0, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, -4.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xef,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, -4.0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, -4.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xef,0x09,0x04]
// Check 1/(2*pi) rounded value and ideomatic fp32 0.15915494 value
// which cannot be accurately represented in bf16.
-v_dot2_bf16_bf16 v2, v0, 0.158203125, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 0.158203125, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 0.15915494, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, 0.15915494, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 0.15915494, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 0.15915494, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, 0x3e22, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 0x3e22, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 0.15915494, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, v2, 0.15915494
-// CHECK: v_dot2_bf16_bf16 v2, v0, v2, 0.15915494 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0x05,0xe2,0x03]
+v_dot2_bf16_bf16 v2.l, v0, v2, 0.15915494
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, v2, 0.15915494 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0x05,0xe2,0x03]
v_dot2_f32_bf16 v2, v1, 0, v2
// CHECK: v_dot2_f32_bf16 v2, v1, 0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0x01,0x09,0x1c]
diff --git a/llvm/test/MC/AMDGPU/gfx11-promotions-fake16.s b/llvm/test/MC/AMDGPU/gfx11-promotions-fake16.s
new file mode 100644
index 0000000000000..95a52ffe103fa
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11-promotions-fake16.s
@@ -0,0 +1,353 @@
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 %s | FileCheck --check-prefix=GFX11 %s
+
+// Check opcode promotions and forced suffices.
+// 1. When a suffix is optional, check that it may be omitted.
+// 2. When a suffix is optional, check that it may be specified w/o any effect.
+// 3. When a suffix is required, check that specifying it enforces opcode promotion.
+// 4. When a suffix is required, check that omitting the suffix results in a different encoding.
+
+//===----------------------------------------------------------------------===//
+// VOP1.
+//===----------------------------------------------------------------------===//
+
+v_mov_b32 v0, v1
+// GFX11: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e]
+
+v_mov_b32_e32 v0, v1
+// GFX11: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e]
+
+//===----------------------------------------------------------------------===//
+// VOP2.
+//===----------------------------------------------------------------------===//
+
+v_add_f16 v5, v1, v2
+// GFX11: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64]
+
+v_add_f16_e32 v5, v1, v2
+// GFX11: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64]
+
+//===----------------------------------------------------------------------===//
+// VOPC.
+//===----------------------------------------------------------------------===//
+
+v_cmp_lt_f32 vcc_lo, v1, v2
+// GFX11: v_cmp_lt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x22,0x7c]
+
+v_cmp_lt_f32_e32 vcc_lo, v1, v2
+// GFX11: v_cmp_lt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x22,0x7c]
+
+//===----------------------------------------------------------------------===//
+// VOPCX.
+//===----------------------------------------------------------------------===//
+
+v_cmpx_class_f16 v1, v2
+// GFX11: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d]
+
+v_cmpx_class_f16_e32 v1, v2
+// GFX11: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d]
+
+//===----------------------------------------------------------------------===//
+// VOP1.DPP8.
+//===----------------------------------------------------------------------===//
+
+v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
+v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
+//===----------------------------------------------------------------------===//
+// VOP1.DPP16.
+//===----------------------------------------------------------------------===//
+
+v_bfrev_b32 v5, v1 quad_perm:[3,2,1,0]
+// GFX11: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+
+v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX11: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+
+//===----------------------------------------------------------------------===//
+// VOP2.DPP8.
+//===----------------------------------------------------------------------===//
+
+v_add_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05]
+
+v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05]
+
+//===----------------------------------------------------------------------===//
+// VOP2.DPP16.
+//===----------------------------------------------------------------------===//
+
+v_add_f16 v5, v1, v2 quad_perm:[3,2,1,0]
+// GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff]
+
+v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+// GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff]
+
+//===----------------------------------------------------------------------===//
+// VOPC.DPP8.
+//===----------------------------------------------------------------------===//
+
+v_cmp_le_u16 v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1
+// GFX11: v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 ; encoding: [0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa]
+
+v_cmp_le_u16_dpp v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1
+// GFX11: v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 ; encoding: [0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa]
+
+//===----------------------------------------------------------------------===//
+// VOPC.DPP16.
+//===----------------------------------------------------------------------===//
+
+v_cmp_gt_u16 v1, v2 row_shl:0x7 row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11: v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:7 row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00]
+
+v_cmp_gt_u16_dpp v1, v2 row_shl:0x7 row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11: v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:7 row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00]
+
+//===----------------------------------------------------------------------===//
+// VOPCX.DPP8.
+//===----------------------------------------------------------------------===//
+
+v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05]
+
+//===----------------------------------------------------------------------===//
+// VOPCX.DPP16.
+//===----------------------------------------------------------------------===//
+
+v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_class_f16_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff]
+
+//===----------------------------------------------------------------------===//
+// VOP1 -> VOP3.
+//===----------------------------------------------------------------------===//
+
+v_sin_f32 v5, 0.5 mul:2
+// GFX11: v_sin_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08]
+
+v_sin_f32_e64 v5, 0.5 mul:2
+// GFX11: v_sin_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08]
+
+v_sin_f32_e64 v5, v1
+// GFX11: v_sin_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00]
+
+v_sin_f32 v5, v1
+// GFX11: v_sin_f32_e32 v5, v1 ; encoding: [0x01,0x6b,0x0a,0x7e]
+
+//===----------------------------------------------------------------------===//
+// VOP2 -> VOP3.
+//===----------------------------------------------------------------------===//
+
+v_add_f32 v5, v1, -v2
+// GFX11: v_add_f32_e64 v5, v1, -v2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x40]
+
+v_add_f32_e64 v5, v1, -v2
+// GFX11: v_add_f32_e64 v5, v1, -v2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x40]
+
+v_add_f32_e64 v5, v1, v2
+// GFX11: v_add_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00]
+
+v_add_f32 v5, v1, v2
+// GFX11: v_add_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x06]
+
+//===----------------------------------------------------------------------===//
+// VOPC -> VOP3.
+//===----------------------------------------------------------------------===//
+
+v_cmp_f_f32 s10, -v1, v2
+// GFX11: v_cmp_f_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x20]
+
+v_cmp_f_f32_e64 s10, -v1, v2
+// GFX11: v_cmp_f_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x20]
+
+v_cmp_f_f32_e64 vcc_lo, v1, v2
+// GFX11: v_cmp_f_f32_e64 vcc_lo, v1, v2 ; encoding: [0x6a,0x00,0x10,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmp_f_f32 vcc_lo, v1, v2
+// GFX11: v_cmp_f_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x20,0x7c]
+
+//===----------------------------------------------------------------------===//
+// VOPCX -> VOP3.
+//===----------------------------------------------------------------------===//
+
+v_cmpx_f_f32 -v1, v2
+// GFX11: v_cmpx_f_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x20]
+
+v_cmpx_f_f32_e64 -v1, v2
+// GFX11: v_cmpx_f_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x20]
+
+v_cmpx_f_f32_e64 v1, v2
+// GFX11: v_cmpx_f_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_f_f32 v1, v2
+// GFX11: v_cmpx_f_f32_e...
[truncated]
|
@llvm/pr-subscribers-mc Author: Brox Chen (broxigarchen) ChangesThis is another NFC patch. Update mc test for a few true16 instructions by duplicating the file to fake16 versions and udpate Patch is 46.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/135816.diff 8 Files Affected:
diff --git a/llvm/test/MC/AMDGPU/bf16_imm-fake16.s b/llvm/test/MC/AMDGPU/bf16_imm-fake16.s
new file mode 100644
index 0000000000000..ee697bee6ab2d
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/bf16_imm-fake16.s
@@ -0,0 +1,114 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s | FileCheck %s
+
+v_dot2_bf16_bf16 v5, v1, v2, 100.0
+// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 0x42c8 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00]
+
+v_dot2_bf16_bf16 v2, v0, 1.0, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 1.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe5,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, 1.0, v0, v2
+// CHECK: v_dot2_bf16_bf16 v2, 1.0, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xf2,0x00,0x0a,0x04]
+
+v_dot2_bf16_bf16 v5, v1, v2, 1.0
+// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 1.0 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]
+
+v_dot2_bf16_bf16 v2, v0, -1.0, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, -1.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe7,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, 0.5, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 0.5, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe1,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, -0.5, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, -0.5, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe3,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, 2.0, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 2.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe9,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, -2.0, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, -2.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xeb,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, 4.0, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 4.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xed,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, -4.0, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, -4.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xef,0x09,0x04]
+
+// Check 1/(2*pi) rounded value and ideomatic fp32 0.15915494 value
+// which cannot be accurately represented in bf16.
+
+v_dot2_bf16_bf16 v2, v0, 0.158203125, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, 0.15915494, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, 0x3e22, v2
+// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
+
+v_dot2_bf16_bf16 v2, v0, v2, 0.15915494
+// CHECK: v_dot2_bf16_bf16 v2, v0, v2, 0.15915494 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0x05,0xe2,0x03]
+
+v_dot2_f32_bf16 v2, v1, 0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0x01,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, 0.5, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 0.5, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe1,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, -0.5, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, -0.5, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe3,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, 1.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 1.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe5,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, -1.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, -1.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe7,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, 2.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 2.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe9,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, -2.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, -2.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xeb,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, 4.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 4.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xed,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, -4.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, -4.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xef,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, 0.15915494, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 0.15915494, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xf1,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, v1, 0x3e22, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 0.15915494, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xf1,0x09,0x1c]
+
+v_dot2_f32_bf16 v2, 0.5, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, 0.5, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf0,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, -0.5, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, -0.5, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf1,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, 1.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, 1.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf2,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, -1.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, -1.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf3,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, 2.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, 2.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf4,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, -2.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, -2.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf5,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, 4.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, 4.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf6,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, -4.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, -4.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf7,0x02,0x0a,0x1c]
+
+v_dot2_f32_bf16 v2, 100.0, v1, v2
+// CHECK: v_dot2_f32_bf16 v2, 0x42c8, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xff,0x02,0x0a,0x1c,0xc8,0x42,0x00,0x00]
+
+v_dot2_f32_bf16 v2, v1, 100.0, v2
+// CHECK: v_dot2_f32_bf16 v2, v1, 0x42c8, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xff,0x09,0x1c,0xc8,0x42,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/bf16_imm.s b/llvm/test/MC/AMDGPU/bf16_imm.s
index 7cf18103adfe5..d79649073aa89 100644
--- a/llvm/test/MC/AMDGPU/bf16_imm.s
+++ b/llvm/test/MC/AMDGPU/bf16_imm.s
@@ -1,54 +1,54 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s | FileCheck %s
-v_dot2_bf16_bf16 v5, v1, v2, 100.0
-// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 0x42c8 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00]
+v_dot2_bf16_bf16 v5.l, v1, v2, 100.0
+// CHECK: v_dot2_bf16_bf16 v5.l, v1, v2, 0x42c8 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00]
-v_dot2_bf16_bf16 v2, v0, 1.0, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 1.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe5,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 1.0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 1.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe5,0x09,0x04]
-v_dot2_bf16_bf16 v2, 1.0, v0, v2
-// CHECK: v_dot2_bf16_bf16 v2, 1.0, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xf2,0x00,0x0a,0x04]
+v_dot2_bf16_bf16 v2.l, 1.0, v0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, 1.0, v0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0xf2,0x00,0x0a,0x04]
-v_dot2_bf16_bf16 v5, v1, v2, 1.0
-// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 1.0 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]
+v_dot2_bf16_bf16 v5.l, v1, v2, 1.0
+// CHECK: v_dot2_bf16_bf16 v5.l, v1, v2, 1.0 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]
-v_dot2_bf16_bf16 v2, v0, -1.0, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, -1.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe7,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, -1.0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, -1.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe7,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, 0.5, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 0.5, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe1,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 0.5, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 0.5, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe1,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, -0.5, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, -0.5, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe3,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, -0.5, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, -0.5, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe3,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, 2.0, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 2.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe9,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 2.0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 2.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe9,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, -2.0, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, -2.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xeb,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, -2.0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, -2.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xeb,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, 4.0, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 4.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xed,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 4.0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 4.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xed,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, -4.0, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, -4.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xef,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, -4.0, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, -4.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xef,0x09,0x04]
// Check 1/(2*pi) rounded value and ideomatic fp32 0.15915494 value
// which cannot be accurately represented in bf16.
-v_dot2_bf16_bf16 v2, v0, 0.158203125, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 0.158203125, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 0.15915494, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, 0.15915494, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 0.15915494, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 0.15915494, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, 0x3e22, v2
-// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
+v_dot2_bf16_bf16 v2.l, v0, 0x3e22, v2.l
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, 0.15915494, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
-v_dot2_bf16_bf16 v2, v0, v2, 0.15915494
-// CHECK: v_dot2_bf16_bf16 v2, v0, v2, 0.15915494 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0x05,0xe2,0x03]
+v_dot2_bf16_bf16 v2.l, v0, v2, 0.15915494
+// CHECK: v_dot2_bf16_bf16 v2.l, v0, v2, 0.15915494 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0x05,0xe2,0x03]
v_dot2_f32_bf16 v2, v1, 0, v2
// CHECK: v_dot2_f32_bf16 v2, v1, 0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0x01,0x09,0x1c]
diff --git a/llvm/test/MC/AMDGPU/gfx11-promotions-fake16.s b/llvm/test/MC/AMDGPU/gfx11-promotions-fake16.s
new file mode 100644
index 0000000000000..95a52ffe103fa
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11-promotions-fake16.s
@@ -0,0 +1,353 @@
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 %s | FileCheck --check-prefix=GFX11 %s
+
+// Check opcode promotions and forced suffices.
+// 1. When a suffix is optional, check that it may be omitted.
+// 2. When a suffix is optional, check that it may be specified w/o any effect.
+// 3. When a suffix is required, check that specifying it enforces opcode promotion.
+// 4. When a suffix is required, check that omitting the suffix results in a different encoding.
+
+//===----------------------------------------------------------------------===//
+// VOP1.
+//===----------------------------------------------------------------------===//
+
+v_mov_b32 v0, v1
+// GFX11: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e]
+
+v_mov_b32_e32 v0, v1
+// GFX11: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e]
+
+//===----------------------------------------------------------------------===//
+// VOP2.
+//===----------------------------------------------------------------------===//
+
+v_add_f16 v5, v1, v2
+// GFX11: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64]
+
+v_add_f16_e32 v5, v1, v2
+// GFX11: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64]
+
+//===----------------------------------------------------------------------===//
+// VOPC.
+//===----------------------------------------------------------------------===//
+
+v_cmp_lt_f32 vcc_lo, v1, v2
+// GFX11: v_cmp_lt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x22,0x7c]
+
+v_cmp_lt_f32_e32 vcc_lo, v1, v2
+// GFX11: v_cmp_lt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x22,0x7c]
+
+//===----------------------------------------------------------------------===//
+// VOPCX.
+//===----------------------------------------------------------------------===//
+
+v_cmpx_class_f16 v1, v2
+// GFX11: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d]
+
+v_cmpx_class_f16_e32 v1, v2
+// GFX11: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d]
+
+//===----------------------------------------------------------------------===//
+// VOP1.DPP8.
+//===----------------------------------------------------------------------===//
+
+v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
+v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
+//===----------------------------------------------------------------------===//
+// VOP1.DPP16.
+//===----------------------------------------------------------------------===//
+
+v_bfrev_b32 v5, v1 quad_perm:[3,2,1,0]
+// GFX11: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+
+v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX11: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+
+//===----------------------------------------------------------------------===//
+// VOP2.DPP8.
+//===----------------------------------------------------------------------===//
+
+v_add_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05]
+
+v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05]
+
+//===----------------------------------------------------------------------===//
+// VOP2.DPP16.
+//===----------------------------------------------------------------------===//
+
+v_add_f16 v5, v1, v2 quad_perm:[3,2,1,0]
+// GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff]
+
+v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+// GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff]
+
+//===----------------------------------------------------------------------===//
+// VOPC.DPP8.
+//===----------------------------------------------------------------------===//
+
+v_cmp_le_u16 v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1
+// GFX11: v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 ; encoding: [0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa]
+
+v_cmp_le_u16_dpp v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1
+// GFX11: v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 ; encoding: [0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa]
+
+//===----------------------------------------------------------------------===//
+// VOPC.DPP16.
+//===----------------------------------------------------------------------===//
+
+v_cmp_gt_u16 v1, v2 row_shl:0x7 row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11: v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:7 row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00]
+
+v_cmp_gt_u16_dpp v1, v2 row_shl:0x7 row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11: v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:7 row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00]
+
+//===----------------------------------------------------------------------===//
+// VOPCX.DPP8.
+//===----------------------------------------------------------------------===//
+
+v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05]
+
+//===----------------------------------------------------------------------===//
+// VOPCX.DPP16.
+//===----------------------------------------------------------------------===//
+
+v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_class_f16_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff]
+
+//===----------------------------------------------------------------------===//
+// VOP1 -> VOP3.
+//===----------------------------------------------------------------------===//
+
+v_sin_f32 v5, 0.5 mul:2
+// GFX11: v_sin_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08]
+
+v_sin_f32_e64 v5, 0.5 mul:2
+// GFX11: v_sin_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08]
+
+v_sin_f32_e64 v5, v1
+// GFX11: v_sin_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00]
+
+v_sin_f32 v5, v1
+// GFX11: v_sin_f32_e32 v5, v1 ; encoding: [0x01,0x6b,0x0a,0x7e]
+
+//===----------------------------------------------------------------------===//
+// VOP2 -> VOP3.
+//===----------------------------------------------------------------------===//
+
+v_add_f32 v5, v1, -v2
+// GFX11: v_add_f32_e64 v5, v1, -v2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x40]
+
+v_add_f32_e64 v5, v1, -v2
+// GFX11: v_add_f32_e64 v5, v1, -v2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x40]
+
+v_add_f32_e64 v5, v1, v2
+// GFX11: v_add_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00]
+
+v_add_f32 v5, v1, v2
+// GFX11: v_add_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x06]
+
+//===----------------------------------------------------------------------===//
+// VOPC -> VOP3.
+//===----------------------------------------------------------------------===//
+
+v_cmp_f_f32 s10, -v1, v2
+// GFX11: v_cmp_f_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x20]
+
+v_cmp_f_f32_e64 s10, -v1, v2
+// GFX11: v_cmp_f_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x20]
+
+v_cmp_f_f32_e64 vcc_lo, v1, v2
+// GFX11: v_cmp_f_f32_e64 vcc_lo, v1, v2 ; encoding: [0x6a,0x00,0x10,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmp_f_f32 vcc_lo, v1, v2
+// GFX11: v_cmp_f_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x20,0x7c]
+
+//===----------------------------------------------------------------------===//
+// VOPCX -> VOP3.
+//===----------------------------------------------------------------------===//
+
+v_cmpx_f_f32 -v1, v2
+// GFX11: v_cmpx_f_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x20]
+
+v_cmpx_f_f32_e64 -v1, v2
+// GFX11: v_cmpx_f_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x20]
+
+v_cmpx_f_f32_e64 v1, v2
+// GFX11: v_cmpx_f_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_f_f32 v1, v2
+// GFX11: v_cmpx_f_f32_e...
[truncated]
|
5a21bc5
to
bc2eeac
Compare
bc2eeac
to
25dc466
Compare
@@ -0,0 +1,43 @@ | |||
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 %s 2>&1 | FileCheck %s -check-prefix=GCN-ERR --implicit-check-not=error: --strict-whitespace | |||
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 %s 2>&1 | FileCheck %s -check-prefix=GCN-ERR --implicit-check-not=error: --strict-whitespace | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you also need to add -mattr=+real-true16 to llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it's in a seperate PR #135823
75d7a52
to
fce2897
Compare
fce2897
to
6d793ef
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This is another NFC patch. Update mc test for a few true16 instructions by duplicating the file to fake16 versions and udpate `mattr` flag with +/-real-true16. Also added some fake16 file that are not properly created before
This is another NFC patch.
Update mc test for a few true16 instructions by duplicating the file to fake16 versions and udpate
mattr
flag with +/-real-true16. Also added some fake16 file that are not properly created before