Skip to content

[AMDGPU][True16] Support V_FLOOR_F16. #78446

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5280,7 +5280,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
: AMDGPU::V_CEIL_F16_fake16_e64;
case AMDGPU::S_FLOOR_F16:
return AMDGPU::V_FLOOR_F16_fake16_e64;
return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
: AMDGPU::V_FLOOR_F16_fake16_e64;
case AMDGPU::S_TRUNC_F16:
return AMDGPU::V_TRUNC_F16_fake16_e64;
case AMDGPU::S_RNDNE_F16:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,7 @@ defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16"
defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
Expand Down
70 changes: 64 additions & 6 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX11-FAKE16 %s

---
name: ffloor_s16_ss
Expand All @@ -19,6 +21,15 @@ body: |
; VI-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(s16) = G_FFLOOR [[TRUNC]]
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FFLOOR]](s16)
; VI-NEXT: $sgpr0 = COPY [[COPY1]](s32)
;
; GCN-LABEL: name: ffloor_s16_ss
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
; GCN-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(s16) = G_FFLOOR [[TRUNC]]
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FFLOOR]](s16)
; GCN-NEXT: $sgpr0 = COPY [[COPY1]](s32)
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:sgpr(s16) = G_FFLOOR %1
Expand All @@ -40,8 +51,24 @@ body: |
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; VI-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; VI-NEXT: $vgpr0 = COPY %2
; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; VI-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_e64_]]
;
; GFX11-LABEL: name: ffloor_s16_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
;
; GFX11-FAKE16-LABEL: name: ffloor_s16_vv
; GFX11-FAKE16: liveins: $vgpr0
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FFLOOR %1
Expand All @@ -63,8 +90,23 @@ body: |
; VI: liveins: $sgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; VI-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; VI-NEXT: $vgpr0 = COPY %2
; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; VI-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_e64_]]
;
; GFX11-LABEL: name: ffloor_s16_vs
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
;
; GFX11-FAKE16-LABEL: name: ffloor_s16_vs
; GFX11-FAKE16: liveins: $sgpr0
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FFLOOR %1
Expand All @@ -86,8 +128,24 @@ body: |
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; VI-NEXT: %3:vgpr_32 = nofpexcept V_FLOOR_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
; VI-NEXT: $vgpr0 = COPY %3
; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
; VI-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_e64_]]
;
; GFX11-LABEL: name: ffloor_fneg_s16_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
;
; GFX11-FAKE16-LABEL: name: ffloor_fneg_s16_vv
; GFX11-FAKE16: liveins: $vgpr0
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,24 @@ body: |
%2:sreg_32 = COPY %1:vgpr_32
%3:sreg_32 = nofpexcept S_CEIL_F16 killed %2:sreg_32, implicit $mode
...

---
name: floor_f16
body: |
bb.0:
; REAL16-LABEL: name: floor_f16
; REAL16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; REAL16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
; REAL16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; REAL16-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, implicit $mode, implicit $exec
;
; FAKE16-LABEL: name: floor_f16
; FAKE16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; FAKE16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
; FAKE16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[V_CVT_F32_U32_e64_]], 0, 0, implicit $mode, implicit $exec
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
%2:sreg_32 = COPY %1:vgpr_32
%3:sreg_32 = nofpexcept S_FLOOR_F16 killed %2:sreg_32, implicit $mode
...
60 changes: 55 additions & 5 deletions llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s

declare half @llvm.floor.f16(half %a)
declare <2 x half> @llvm.floor.v2f16(<2 x half> %a)
Expand Down Expand Up @@ -59,11 +60,31 @@ define amdgpu_kernel void @floor_f16(
; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0
; GFX11-NEXT: s_mov_b32 s5, s1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_floor_f16_e32 v0, v0
; GFX11-NEXT: v_floor_f16_e32 v0.l, v0.l
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-FAKE16-LABEL: floor_f16:
; GFX11-FAKE16: ; %bb.0: ; %entry
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6
; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_mov_b32 s8, s2
; GFX11-FAKE16-NEXT: s_mov_b32 s9, s3
; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0
; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[8:11], 0
; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-FAKE16-NEXT: v_floor_f16_e32 v0, v0
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-FAKE16-NEXT: s_nop 0
; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
Expand Down Expand Up @@ -143,14 +164,43 @@ define amdgpu_kernel void @floor_v2f16(
; GFX11-NEXT: s_mov_b32 s5, s1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_floor_f16_e32 v0, v0
; GFX11-NEXT: v_floor_f16_e32 v0.l, v0.l
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX11-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_floor_f16_e32 v1, v1
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: v_floor_f16_e32 v0.h, v0.h
; GFX11-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-FAKE16-LABEL: floor_v2f16:
; GFX11-FAKE16: ; %bb.0: ; %entry
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6
; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_mov_b32 s8, s2
; GFX11-FAKE16-NEXT: s_mov_b32 s9, s3
; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0
; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-FAKE16-NEXT: v_floor_f16_e32 v0, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_floor_f16_e32 v1, v1
; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-FAKE16-NEXT: s_nop 0
; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
Expand Down
48 changes: 48 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop1-fake16.s
Original file line number Diff line number Diff line change
@@ -1,6 +1,54 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s

v_floor_f16 v5, v1
// GFX11: encoding: [0x01,0xb7,0x0a,0x7e]

v_floor_f16 v5, v127
// GFX11: encoding: [0x7f,0xb7,0x0a,0x7e]

v_floor_f16 v5, s1
// GFX11: encoding: [0x01,0xb6,0x0a,0x7e]

v_floor_f16 v5, s105
// GFX11: encoding: [0x69,0xb6,0x0a,0x7e]

v_floor_f16 v5, vcc_lo
// GFX11: encoding: [0x6a,0xb6,0x0a,0x7e]

v_floor_f16 v5, vcc_hi
// GFX11: encoding: [0x6b,0xb6,0x0a,0x7e]

v_floor_f16 v5, ttmp15
// GFX11: encoding: [0x7b,0xb6,0x0a,0x7e]

v_floor_f16 v5, m0
// GFX11: encoding: [0x7d,0xb6,0x0a,0x7e]

v_floor_f16 v5, exec_lo
// GFX11: encoding: [0x7e,0xb6,0x0a,0x7e]

v_floor_f16 v5, exec_hi
// GFX11: encoding: [0x7f,0xb6,0x0a,0x7e]

v_floor_f16 v5, null
// GFX11: encoding: [0x7c,0xb6,0x0a,0x7e]

v_floor_f16 v5, -1
// GFX11: encoding: [0xc1,0xb6,0x0a,0x7e]

v_floor_f16 v5, 0.5
// GFX11: encoding: [0xf0,0xb6,0x0a,0x7e]

v_floor_f16 v5, src_scc
// GFX11: encoding: [0xfd,0xb6,0x0a,0x7e]

v_floor_f16 v127, 0xfe0b
// GFX11: encoding: [0xff,0xb6,0xfe,0x7e,0x0b,0xfe,0x00,0x00]

v_floor_f32 v5, v1
// GFX11: encoding: [0x01,0x49,0x0a,0x7e]

v_ceil_f16 v5, v1
// GFX11: encoding: [0x01,0xb9,0x0a,0x7e]

Expand Down
42 changes: 24 additions & 18 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
Original file line number Diff line number Diff line change
Expand Up @@ -1906,50 +1906,56 @@ v_ffbl_b32 v5, src_scc
v_ffbl_b32 v255, 0xaf123456
// GFX11: encoding: [0xff,0x74,0xfe,0x7f,0x56,0x34,0x12,0xaf]

v_floor_f16 v5, v1
v_floor_f16 v5.l, v1.l
// GFX11: encoding: [0x01,0xb7,0x0a,0x7e]

v_floor_f16 v5, v127
v_floor_f16 v5.l, v127.l
// GFX11: encoding: [0x7f,0xb7,0x0a,0x7e]

v_floor_f16 v5, s1
v_floor_f16 v5.l, v1.h
// GFX11: encoding: [0x81,0xb7,0x0a,0x7e]

v_floor_f16 v5.l, v127.h
// GFX11: encoding: [0xff,0xb7,0x0a,0x7e]

v_floor_f16 v5.l, s1
// GFX11: encoding: [0x01,0xb6,0x0a,0x7e]

v_floor_f16 v5, s105
v_floor_f16 v5.l, s105
// GFX11: encoding: [0x69,0xb6,0x0a,0x7e]

v_floor_f16 v5, vcc_lo
v_floor_f16 v5.l, vcc_lo
// GFX11: encoding: [0x6a,0xb6,0x0a,0x7e]

v_floor_f16 v5, vcc_hi
v_floor_f16 v5.l, vcc_hi
// GFX11: encoding: [0x6b,0xb6,0x0a,0x7e]

v_floor_f16 v5, ttmp15
v_floor_f16 v5.l, ttmp15
// GFX11: encoding: [0x7b,0xb6,0x0a,0x7e]

v_floor_f16 v5, m0
v_floor_f16 v5.l, m0
// GFX11: encoding: [0x7d,0xb6,0x0a,0x7e]

v_floor_f16 v5, exec_lo
v_floor_f16 v5.l, exec_lo
// GFX11: encoding: [0x7e,0xb6,0x0a,0x7e]

v_floor_f16 v5, exec_hi
v_floor_f16 v5.l, exec_hi
// GFX11: encoding: [0x7f,0xb6,0x0a,0x7e]

v_floor_f16 v5, null
v_floor_f16 v5.l, null
// GFX11: encoding: [0x7c,0xb6,0x0a,0x7e]

v_floor_f16 v5, -1
v_floor_f16 v5.l, -1
// GFX11: encoding: [0xc1,0xb6,0x0a,0x7e]

v_floor_f16 v5, 0.5
// GFX11: encoding: [0xf0,0xb6,0x0a,0x7e]
v_floor_f16 v127.l, 0.5
// GFX11: encoding: [0xf0,0xb6,0xfe,0x7e]

v_floor_f16 v5, src_scc
// GFX11: encoding: [0xfd,0xb6,0x0a,0x7e]
v_floor_f16 v5.h, src_scc
// GFX11: encoding: [0xfd,0xb6,0x0a,0x7f]

v_floor_f16 v127, 0xfe0b
// GFX11: encoding: [0xff,0xb6,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
v_floor_f16 v127.h, 0xfe0b
// GFX11: encoding: [0xff,0xb6,0xfe,0x7f,0x0b,0xfe,0x00,0x00]

v_floor_f32 v5, v1
// GFX11: encoding: [0x01,0x49,0x0a,0x7e]
Expand Down
Loading