Skip to content

Commit c923d90

Browse files
kosarevampandey-1995
authored andcommitted
[AMDGPU][True16] Support V_FLOOR_F16. (llvm#78446)
1 parent c7e8c96 commit c923d90

16 files changed

+371
-81
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5280,7 +5280,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
52805280
return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
52815281
: AMDGPU::V_CEIL_F16_fake16_e64;
52825282
case AMDGPU::S_FLOOR_F16:
5283-
return AMDGPU::V_FLOOR_F16_fake16_e64;
5283+
return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
5284+
: AMDGPU::V_FLOOR_F16_fake16_e64;
52845285
case AMDGPU::S_TRUNC_F16:
52855286
return AMDGPU::V_TRUNC_F16_fake16_e64;
52865287
case AMDGPU::S_RNDNE_F16:

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,7 @@ defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16"
881881
defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
882882
defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
883883
defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
884+
defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
884885
defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
885886
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
886887
defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s
4+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX11-FAKE16 %s
35

46
---
57
name: ffloor_s16_ss
@@ -19,6 +21,15 @@ body: |
1921
; VI-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(s16) = G_FFLOOR [[TRUNC]]
2022
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FFLOOR]](s16)
2123
; VI-NEXT: $sgpr0 = COPY [[COPY1]](s32)
24+
;
25+
; GCN-LABEL: name: ffloor_s16_ss
26+
; GCN: liveins: $sgpr0
27+
; GCN-NEXT: {{ $}}
28+
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
29+
; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
30+
; GCN-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(s16) = G_FFLOOR [[TRUNC]]
31+
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FFLOOR]](s16)
32+
; GCN-NEXT: $sgpr0 = COPY [[COPY1]](s32)
2233
%0:sgpr(s32) = COPY $sgpr0
2334
%1:sgpr(s16) = G_TRUNC %0
2435
%2:sgpr(s16) = G_FFLOOR %1
@@ -40,8 +51,24 @@ body: |
4051
; VI: liveins: $vgpr0
4152
; VI-NEXT: {{ $}}
4253
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
43-
; VI-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
44-
; VI-NEXT: $vgpr0 = COPY %2
54+
; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
55+
; VI-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_e64_]]
56+
;
57+
; GFX11-LABEL: name: ffloor_s16_vv
58+
; GFX11: liveins: $vgpr0
59+
; GFX11-NEXT: {{ $}}
60+
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
61+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
62+
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
63+
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
64+
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
65+
;
66+
; GFX11-FAKE16-LABEL: name: ffloor_s16_vv
67+
; GFX11-FAKE16: liveins: $vgpr0
68+
; GFX11-FAKE16-NEXT: {{ $}}
69+
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
70+
; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
71+
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]]
4572
%0:vgpr(s32) = COPY $vgpr0
4673
%1:vgpr(s16) = G_TRUNC %0
4774
%2:vgpr(s16) = G_FFLOOR %1
@@ -63,8 +90,23 @@ body: |
6390
; VI: liveins: $sgpr0
6491
; VI-NEXT: {{ $}}
6592
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
66-
; VI-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
67-
; VI-NEXT: $vgpr0 = COPY %2
93+
; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
94+
; VI-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_e64_]]
95+
;
96+
; GFX11-LABEL: name: ffloor_s16_vs
97+
; GFX11: liveins: $sgpr0
98+
; GFX11-NEXT: {{ $}}
99+
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
100+
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
101+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
102+
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
103+
;
104+
; GFX11-FAKE16-LABEL: name: ffloor_s16_vs
105+
; GFX11-FAKE16: liveins: $sgpr0
106+
; GFX11-FAKE16-NEXT: {{ $}}
107+
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
108+
; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
109+
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]]
68110
%0:sgpr(s32) = COPY $sgpr0
69111
%1:sgpr(s16) = G_TRUNC %0
70112
%2:vgpr(s16) = G_FFLOOR %1
@@ -86,8 +128,24 @@ body: |
86128
; VI: liveins: $vgpr0
87129
; VI-NEXT: {{ $}}
88130
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
89-
; VI-NEXT: %3:vgpr_32 = nofpexcept V_FLOOR_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
90-
; VI-NEXT: $vgpr0 = COPY %3
131+
; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
132+
; VI-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_e64_]]
133+
;
134+
; GFX11-LABEL: name: ffloor_fneg_s16_vv
135+
; GFX11: liveins: $vgpr0
136+
; GFX11-NEXT: {{ $}}
137+
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
138+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
139+
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
140+
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
141+
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
142+
;
143+
; GFX11-FAKE16-LABEL: name: ffloor_fneg_s16_vv
144+
; GFX11-FAKE16: liveins: $vgpr0
145+
; GFX11-FAKE16-NEXT: {{ $}}
146+
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
147+
; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
148+
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]]
91149
%0:vgpr(s32) = COPY $vgpr0
92150
%1:vgpr(s16) = G_TRUNC %0
93151
%2:vgpr(s16) = G_FNEG %1

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,24 @@ body: |
7878
%2:sreg_32 = COPY %1:vgpr_32
7979
%3:sreg_32 = nofpexcept S_CEIL_F16 killed %2:sreg_32, implicit $mode
8080
...
81+
82+
---
83+
name: floor_f16
84+
body: |
85+
bb.0:
86+
; REAL16-LABEL: name: floor_f16
87+
; REAL16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
88+
; REAL16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
89+
; REAL16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
90+
; REAL16-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, implicit $mode, implicit $exec
91+
;
92+
; FAKE16-LABEL: name: floor_f16
93+
; FAKE16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
94+
; FAKE16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
95+
; FAKE16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
96+
; FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[V_CVT_F32_U32_e64_]], 0, 0, implicit $mode, implicit $exec
97+
%0:vgpr_32 = IMPLICIT_DEF
98+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
99+
%2:sreg_32 = COPY %1:vgpr_32
100+
%3:sreg_32 = nofpexcept S_FLOOR_F16 killed %2:sreg_32, implicit $mode
101+
...

llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
22
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s
33
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
4-
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
4+
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
5+
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
56

67
declare half @llvm.floor.f16(half %a)
78
declare <2 x half> @llvm.floor.v2f16(<2 x half> %a)
@@ -59,11 +60,31 @@ define amdgpu_kernel void @floor_f16(
5960
; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0
6061
; GFX11-NEXT: s_mov_b32 s5, s1
6162
; GFX11-NEXT: s_waitcnt vmcnt(0)
62-
; GFX11-NEXT: v_floor_f16_e32 v0, v0
63+
; GFX11-NEXT: v_floor_f16_e32 v0.l, v0.l
6364
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
6465
; GFX11-NEXT: s_nop 0
6566
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
6667
; GFX11-NEXT: s_endpgm
68+
;
69+
; GFX11-FAKE16-LABEL: floor_f16:
70+
; GFX11-FAKE16: ; %bb.0: ; %entry
71+
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
72+
; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
73+
; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
74+
; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6
75+
; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7
76+
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
77+
; GFX11-FAKE16-NEXT: s_mov_b32 s8, s2
78+
; GFX11-FAKE16-NEXT: s_mov_b32 s9, s3
79+
; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0
80+
; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[8:11], 0
81+
; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1
82+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
83+
; GFX11-FAKE16-NEXT: v_floor_f16_e32 v0, v0
84+
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
85+
; GFX11-FAKE16-NEXT: s_nop 0
86+
; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
87+
; GFX11-FAKE16-NEXT: s_endpgm
6788
ptr addrspace(1) %r,
6889
ptr addrspace(1) %a) {
6990
entry:
@@ -143,14 +164,43 @@ define amdgpu_kernel void @floor_v2f16(
143164
; GFX11-NEXT: s_mov_b32 s5, s1
144165
; GFX11-NEXT: s_waitcnt vmcnt(0)
145166
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
146-
; GFX11-NEXT: v_floor_f16_e32 v0, v0
167+
; GFX11-NEXT: v_floor_f16_e32 v0.l, v0.l
168+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
169+
; GFX11-NEXT: v_mov_b16_e32 v0.h, v1.l
170+
; GFX11-NEXT: v_mov_b16_e32 v1.l, v0.l
147171
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
148-
; GFX11-NEXT: v_floor_f16_e32 v1, v1
149-
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
172+
; GFX11-NEXT: v_floor_f16_e32 v0.h, v0.h
173+
; GFX11-NEXT: v_mov_b16_e32 v0.l, v0.h
174+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
175+
; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0
150176
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
151177
; GFX11-NEXT: s_nop 0
152178
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
153179
; GFX11-NEXT: s_endpgm
180+
;
181+
; GFX11-FAKE16-LABEL: floor_v2f16:
182+
; GFX11-FAKE16: ; %bb.0: ; %entry
183+
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
184+
; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
185+
; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
186+
; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6
187+
; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7
188+
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
189+
; GFX11-FAKE16-NEXT: s_mov_b32 s8, s2
190+
; GFX11-FAKE16-NEXT: s_mov_b32 s9, s3
191+
; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0
192+
; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
193+
; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1
194+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
195+
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
196+
; GFX11-FAKE16-NEXT: v_floor_f16_e32 v0, v0
197+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
198+
; GFX11-FAKE16-NEXT: v_floor_f16_e32 v1, v1
199+
; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
200+
; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
201+
; GFX11-FAKE16-NEXT: s_nop 0
202+
; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
203+
; GFX11-FAKE16-NEXT: s_endpgm
154204
ptr addrspace(1) %r,
155205
ptr addrspace(1) %a) {
156206
entry:

llvm/test/MC/AMDGPU/gfx11_asm_vop1-fake16.s

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,54 @@
11
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s
22
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s
33

4+
v_floor_f16 v5, v1
5+
// GFX11: encoding: [0x01,0xb7,0x0a,0x7e]
6+
7+
v_floor_f16 v5, v127
8+
// GFX11: encoding: [0x7f,0xb7,0x0a,0x7e]
9+
10+
v_floor_f16 v5, s1
11+
// GFX11: encoding: [0x01,0xb6,0x0a,0x7e]
12+
13+
v_floor_f16 v5, s105
14+
// GFX11: encoding: [0x69,0xb6,0x0a,0x7e]
15+
16+
v_floor_f16 v5, vcc_lo
17+
// GFX11: encoding: [0x6a,0xb6,0x0a,0x7e]
18+
19+
v_floor_f16 v5, vcc_hi
20+
// GFX11: encoding: [0x6b,0xb6,0x0a,0x7e]
21+
22+
v_floor_f16 v5, ttmp15
23+
// GFX11: encoding: [0x7b,0xb6,0x0a,0x7e]
24+
25+
v_floor_f16 v5, m0
26+
// GFX11: encoding: [0x7d,0xb6,0x0a,0x7e]
27+
28+
v_floor_f16 v5, exec_lo
29+
// GFX11: encoding: [0x7e,0xb6,0x0a,0x7e]
30+
31+
v_floor_f16 v5, exec_hi
32+
// GFX11: encoding: [0x7f,0xb6,0x0a,0x7e]
33+
34+
v_floor_f16 v5, null
35+
// GFX11: encoding: [0x7c,0xb6,0x0a,0x7e]
36+
37+
v_floor_f16 v5, -1
38+
// GFX11: encoding: [0xc1,0xb6,0x0a,0x7e]
39+
40+
v_floor_f16 v5, 0.5
41+
// GFX11: encoding: [0xf0,0xb6,0x0a,0x7e]
42+
43+
v_floor_f16 v5, src_scc
44+
// GFX11: encoding: [0xfd,0xb6,0x0a,0x7e]
45+
46+
v_floor_f16 v127, 0xfe0b
47+
// GFX11: encoding: [0xff,0xb6,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
48+
49+
v_floor_f32 v5, v1
50+
// GFX11: encoding: [0x01,0x49,0x0a,0x7e]
51+
452
v_ceil_f16 v5, v1
553
// GFX11: encoding: [0x01,0xb9,0x0a,0x7e]
654

llvm/test/MC/AMDGPU/gfx11_asm_vop1.s

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1906,50 +1906,56 @@ v_ffbl_b32 v5, src_scc
19061906
v_ffbl_b32 v255, 0xaf123456
19071907
// GFX11: encoding: [0xff,0x74,0xfe,0x7f,0x56,0x34,0x12,0xaf]
19081908

1909-
v_floor_f16 v5, v1
1909+
v_floor_f16 v5.l, v1.l
19101910
// GFX11: encoding: [0x01,0xb7,0x0a,0x7e]
19111911

1912-
v_floor_f16 v5, v127
1912+
v_floor_f16 v5.l, v127.l
19131913
// GFX11: encoding: [0x7f,0xb7,0x0a,0x7e]
19141914

1915-
v_floor_f16 v5, s1
1915+
v_floor_f16 v5.l, v1.h
1916+
// GFX11: encoding: [0x81,0xb7,0x0a,0x7e]
1917+
1918+
v_floor_f16 v5.l, v127.h
1919+
// GFX11: encoding: [0xff,0xb7,0x0a,0x7e]
1920+
1921+
v_floor_f16 v5.l, s1
19161922
// GFX11: encoding: [0x01,0xb6,0x0a,0x7e]
19171923

1918-
v_floor_f16 v5, s105
1924+
v_floor_f16 v5.l, s105
19191925
// GFX11: encoding: [0x69,0xb6,0x0a,0x7e]
19201926

1921-
v_floor_f16 v5, vcc_lo
1927+
v_floor_f16 v5.l, vcc_lo
19221928
// GFX11: encoding: [0x6a,0xb6,0x0a,0x7e]
19231929

1924-
v_floor_f16 v5, vcc_hi
1930+
v_floor_f16 v5.l, vcc_hi
19251931
// GFX11: encoding: [0x6b,0xb6,0x0a,0x7e]
19261932

1927-
v_floor_f16 v5, ttmp15
1933+
v_floor_f16 v5.l, ttmp15
19281934
// GFX11: encoding: [0x7b,0xb6,0x0a,0x7e]
19291935

1930-
v_floor_f16 v5, m0
1936+
v_floor_f16 v5.l, m0
19311937
// GFX11: encoding: [0x7d,0xb6,0x0a,0x7e]
19321938

1933-
v_floor_f16 v5, exec_lo
1939+
v_floor_f16 v5.l, exec_lo
19341940
// GFX11: encoding: [0x7e,0xb6,0x0a,0x7e]
19351941

1936-
v_floor_f16 v5, exec_hi
1942+
v_floor_f16 v5.l, exec_hi
19371943
// GFX11: encoding: [0x7f,0xb6,0x0a,0x7e]
19381944

1939-
v_floor_f16 v5, null
1945+
v_floor_f16 v5.l, null
19401946
// GFX11: encoding: [0x7c,0xb6,0x0a,0x7e]
19411947

1942-
v_floor_f16 v5, -1
1948+
v_floor_f16 v5.l, -1
19431949
// GFX11: encoding: [0xc1,0xb6,0x0a,0x7e]
19441950

1945-
v_floor_f16 v5, 0.5
1946-
// GFX11: encoding: [0xf0,0xb6,0x0a,0x7e]
1951+
v_floor_f16 v127.l, 0.5
1952+
// GFX11: encoding: [0xf0,0xb6,0xfe,0x7e]
19471953

1948-
v_floor_f16 v5, src_scc
1949-
// GFX11: encoding: [0xfd,0xb6,0x0a,0x7e]
1954+
v_floor_f16 v5.h, src_scc
1955+
// GFX11: encoding: [0xfd,0xb6,0x0a,0x7f]
19501956

1951-
v_floor_f16 v127, 0xfe0b
1952-
// GFX11: encoding: [0xff,0xb6,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
1957+
v_floor_f16 v127.h, 0xfe0b
1958+
// GFX11: encoding: [0xff,0xb6,0xfe,0x7f,0x0b,0xfe,0x00,0x00]
19531959

19541960
v_floor_f32 v5, v1
19551961
// GFX11: encoding: [0x01,0x49,0x0a,0x7e]

0 commit comments

Comments
 (0)