-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU][True16] Support V_FLOOR_F16. #78446
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-mc Author: Ivan Kosarev (kosarev) ChangesPatch is 35.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78446.diff 16 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index aa98a4b860dda9..58fdb9b724931a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5280,7 +5280,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
: AMDGPU::V_CEIL_F16_fake16_e64;
case AMDGPU::S_FLOOR_F16:
- return AMDGPU::V_FLOOR_F16_fake16_e64;
+ return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
+ : AMDGPU::V_FLOOR_F16_fake16_e64;
case AMDGPU::S_TRUNC_F16:
return AMDGPU::V_TRUNC_F16_fake16_e64;
case AMDGPU::S_RNDNE_F16:
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index d604990dc88c20..b0dd92af4a027a 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -881,6 +881,7 @@ defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16"
defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
+defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir
index 52c37ec6246c96..30975a8937db62 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir
@@ -1,5 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX11-FAKE16 %s
---
name: ffloor_s16_ss
@@ -19,6 +21,15 @@ body: |
; VI-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(s16) = G_FFLOOR [[TRUNC]]
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FFLOOR]](s16)
; VI-NEXT: $sgpr0 = COPY [[COPY1]](s32)
+ ;
+ ; GCN-LABEL: name: ffloor_s16_ss
+ ; GCN: liveins: $sgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(s16) = G_FFLOOR [[TRUNC]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FFLOOR]](s16)
+ ; GCN-NEXT: $sgpr0 = COPY [[COPY1]](s32)
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:sgpr(s16) = G_FFLOOR %1
@@ -40,8 +51,24 @@ body: |
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: $vgpr0 = COPY %2
+ ; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_e64_]]
+ ;
+ ; GFX11-LABEL: name: ffloor_s16_vv
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
+ ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
+ ; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: ffloor_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FFLOOR %1
@@ -63,8 +90,23 @@ body: |
; VI: liveins: $sgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; VI-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: $vgpr0 = COPY %2
+ ; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_e64_]]
+ ;
+ ; GFX11-LABEL: name: ffloor_s16_vs
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
+ ; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: ffloor_s16_vs
+ ; GFX11-FAKE16: liveins: $sgpr0
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FFLOOR %1
@@ -86,8 +128,24 @@ body: |
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI-NEXT: %3:vgpr_32 = nofpexcept V_FLOOR_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: $vgpr0 = COPY %3
+ ; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_e64_]]
+ ;
+ ; GFX11-LABEL: name: ffloor_fneg_s16_vv
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
+ ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
+ ; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: ffloor_fneg_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
index d4eab5b797e66c..7767aa54c81519 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
@@ -78,3 +78,24 @@ body: |
%2:sreg_32 = COPY %1:vgpr_32
%3:sreg_32 = nofpexcept S_CEIL_F16 killed %2:sreg_32, implicit $mode
...
+
+---
+name: floor_f16
+body: |
+ bb.0:
+ ; REAL16-LABEL: name: floor_f16
+ ; REAL16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; REAL16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; REAL16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; REAL16-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, implicit $mode, implicit $exec
+ ;
+ ; FAKE16-LABEL: name: floor_f16
+ ; FAKE16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; FAKE16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; FAKE16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[V_CVT_F32_U32_e64_]], 0, 0, implicit $mode, implicit $exec
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %2:sreg_32 = COPY %1:vgpr_32
+ %3:sreg_32 = nofpexcept S_FLOOR_F16 killed %2:sreg_32, implicit $mode
+...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
index 00bb32c768dca3..e8d037c5ff53e0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
declare half @llvm.floor.f16(half %a)
declare <2 x half> @llvm.floor.v2f16(<2 x half> %a)
@@ -59,11 +60,31 @@ define amdgpu_kernel void @floor_f16(
; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0
; GFX11-NEXT: s_mov_b32 s5, s1
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_floor_f16_e32 v0, v0
+; GFX11-NEXT: v_floor_f16_e32 v0.l, v0.l
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: floor_f16:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_floor_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: s_nop 0
+; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
@@ -143,14 +164,43 @@ define amdgpu_kernel void @floor_v2f16(
; GFX11-NEXT: s_mov_b32 s5, s1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-NEXT: v_floor_f16_e32 v0, v0
+; GFX11-NEXT: v_floor_f16_e32 v0.l, v0.l
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_floor_f16_e32 v1, v1
-; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-NEXT: v_floor_f16_e32 v0.h, v0.h
+; GFX11-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: floor_v2f16:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FAKE16-NEXT: v_floor_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_floor_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: s_nop 0
+; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1-fake16.s
index 668085cffbf004..a155b74046babc 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1-fake16.s
@@ -1,6 +1,54 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s
+v_floor_f16 v5, v1
+// GFX11: encoding: [0x01,0xb7,0x0a,0x7e]
+
+v_floor_f16 v5, v127
+// GFX11: encoding: [0x7f,0xb7,0x0a,0x7e]
+
+v_floor_f16 v5, s1
+// GFX11: encoding: [0x01,0xb6,0x0a,0x7e]
+
+v_floor_f16 v5, s105
+// GFX11: encoding: [0x69,0xb6,0x0a,0x7e]
+
+v_floor_f16 v5, vcc_lo
+// GFX11: encoding: [0x6a,0xb6,0x0a,0x7e]
+
+v_floor_f16 v5, vcc_hi
+// GFX11: encoding: [0x6b,0xb6,0x0a,0x7e]
+
+v_floor_f16 v5, ttmp15
+// GFX11: encoding: [0x7b,0xb6,0x0a,0x7e]
+
+v_floor_f16 v5, m0
+// GFX11: encoding: [0x7d,0xb6,0x0a,0x7e]
+
+v_floor_f16 v5, exec_lo
+// GFX11: encoding: [0x7e,0xb6,0x0a,0x7e]
+
+v_floor_f16 v5, exec_hi
+// GFX11: encoding: [0x7f,0xb6,0x0a,0x7e]
+
+v_floor_f16 v5, null
+// GFX11: encoding: [0x7c,0xb6,0x0a,0x7e]
+
+v_floor_f16 v5, -1
+// GFX11: encoding: [0xc1,0xb6,0x0a,0x7e]
+
+v_floor_f16 v5, 0.5
+// GFX11: encoding: [0xf0,0xb6,0x0a,0x7e]
+
+v_floor_f16 v5, src_scc
+// GFX11: encoding: [0xfd,0xb6,0x0a,0x7e]
+
+v_floor_f16 v127, 0xfe0b
+// GFX11: encoding: [0xff,0xb6,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
+
+v_floor_f32 v5, v1
+// GFX11: encoding: [0x01,0x49,0x0a,0x7e]
+
v_ceil_f16 v5, v1
// GFX11: encoding: [0x01,0xb9,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
index 6b19a5c94a64e4..86c2375c89496a 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
@@ -1906,50 +1906,56 @@ v_ffbl_b32 v5, src_scc
v_ffbl_b32 v255, 0xaf123456
// GFX11: encoding: [0xff,0x74,0xfe,0x7f,0x56,0x34,0x12,0xaf]
-v_floor_f16 v5, v1
+v_floor_f16 v5.l, v1.l
// GFX11: encoding: [0x01,0xb7,0x0a,0x7e]
-v_floor_f16 v5, v127
+v_floor_f16 v5.l, v127.l
// GFX11: encoding: [0x7f,0xb7,0x0a,0x7e]
-v_floor_f16 v5, s1
+v_floor_f16 v5.l, v1.h
+// GFX11: encoding: [0x81,0xb7,0x0a,0x7e]
+
+v_floor_f16 v5.l, v127.h
+// GFX11: encoding: [0xff,0xb7,0x0a,0x7e]
+
+v_floor_f16 v5.l, s1
// GFX11: encoding: [0x01,0xb6,0x0a,0x7e]
-v_floor_f16 v5, s105
+v_floor_f16 v5.l, s105
// GFX11: encoding: [0x69,0xb6,0x0a,0x7e]
-v_floor_f16 v5, vcc_lo
+v_floor_f16 v5.l, vcc_lo
// GFX11: encoding: [0x6a,0xb6,0x0a,0x7e]
-v_floor_f16 v5, vcc_hi
+v_floor_f16 v5.l, vcc_hi
// GFX11: encoding: [0x6b,0xb6,0x0a,0x7e]
-v_floor_f16 v5, ttmp15
+v_floor_f16 v5.l, ttmp15
// GFX11: encoding: [0x7b,0xb6,0x0a,0x7e]
-v_floor_f16 v5, m0
+v_floor_f16 v5.l, m0
// GFX11: encoding: [0x7d,0xb6,0x0a,0x7e]
-v_floor_f16 v5, exec_lo
+v_floor_f16 v5.l, exec_lo
// GFX11: encoding: [0x7e,0xb6,0x0a,0x7e]
-v_floor_f16 v5, exec_hi
+v_floor_f16 v5.l, exec_hi
// GFX11: encoding: [0x7f,0xb6,0x0a,0x7e]
-v_floor_f16 v5, null
+v_floor_f16 v5.l, null
// GFX11: encoding: [0x7c,0xb6,0x0a,0x7e]
-v_floor_f16 v5, -1
+v_floor_f16 v5.l, -1
// GFX11: encoding: [0xc1,0xb6,0x0a,0x7e]
-v_floor_f16 v5, 0.5
-// GFX11: encoding: [0xf0,0xb6,0x0a,0x7e]
+v_floor_f16 v127.l, 0.5
+// GFX11: encoding: [0xf0,0xb6,0xfe,0x7e]
-v_floor_f16 v5, src_scc
-// GFX11: encoding: [0xfd,0xb6,0x0a,0x7e]
+v_floor_f16 v5.h, src_scc
+// GFX11: encoding: [0xfd,0xb6,0x0a,0x7f]
-v_floor_f16 v127, 0xfe0b
-// GFX11: encoding: [0xff,0xb6,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
+v_floor_f16 v127.h, 0xfe0b
+// GFX11: encoding: [0xff,0xb6,0xfe,0x7f,0x0b,0xfe,0x00,0x00]
v_floor_f32 v5, v1
// GFX11: encoding: [0x01,0x49,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16-fake16.s
index e3679b9321f439..038a9d4c9e1895 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16-fake16.s
@@ -1,6 +1,48 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+v_floor_f16 v5, v1 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+
+v_floor_f16 v5, v1 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+
+v_floor_f16 v5, v1 row_mirror
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x40,0x01,0xff]
+
+v_floor_f16 v5, v1 row_half_mirror
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x41,0x01,0xff]
+
+v_floor_f16 v5, v1 row_shl:1
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x01,0x01,0xff]
+
+v_floor_f16 v5, v1 row_shl:15
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+
+v_floor_f16 v5, v1 row_shr:1
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x11,0x01,0xff]
+
+v_floor_f16 v5, v1 row_shr:15
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+
+v_floor_f16 v5, v1 row_ror:1
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x21,0x01,0xff]
+
+v_floor_f16 v5, v1 row_ror:15
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+
+v_floor_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x50,0x01,0xff]
+
+v_floor_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+
+v_floor_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x60,0x09,0x13]
+
+v_floor_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xb6,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+
v_ceil_f16 v5, v1 quad_perm:[3,2,1,0]
// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s
index cd9aa9273f1d86..fa6df6affeb1e7 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s
@@ -1513,47 +1513,47 @@ v_ffbl_b32 v5, v1 row_xmask...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
No description provided.