Skip to content

Commit 4feed6c

Browse files
pravinjagtaparsenm
authored andcommitted
AMDGPU: Add V_CVT_F32_BF16 for gfx950
1 parent ca1b35a commit 4feed6c

File tree

6 files changed

+206
-3
lines changed

6 files changed

+206
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,12 @@ def FeatureRealTrue16Insts : SubtargetFeature<"real-true16",
438438
"Use true 16-bit registers"
439439
>;
440440

441+
def FeatureBF16ConversionInsts : SubtargetFeature<"bf16-cvt-insts",
442+
"HasBF16ConversionInsts",
443+
"true",
444+
"Has bf16 conversion instructions"
445+
>;
446+
441447
def FeatureVOP3P : SubtargetFeature<"vop3p",
442448
"HasVOP3PInsts",
443449
"true",
@@ -1504,7 +1510,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
15041510
FeatureFP8ConversionInsts,
15051511
FeatureCvtFP8VOP1Bug,
15061512
FeatureGFX950Insts,
1507-
FeaturePrngInst
1513+
FeaturePrngInst,
1514+
FeatureBF16ConversionInsts
15081515
])>;
15091516

15101517
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -2144,6 +2151,9 @@ def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() &&
21442151
// FIXME When we default to RealTrue16 instead of Fake, change the line as follows.
21452152
// AssemblerPredicate<(all_of FeatureTrue16BitInsts, (not FeatureRealTrue16Insts))>;
21462153

2154+
def HasBF16ConversionInsts : Predicate<"Subtarget->hasBF16ConversionInsts()">,
2155+
AssemblerPredicate<(all_of FeatureBF16ConversionInsts)>;
2156+
21472157
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
21482158
AssemblerPredicate<(all_of FeatureVOP3P)>;
21492159

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class AMDGPUSubtarget {
5151
bool Has16BitInsts = false;
5252
bool HasTrue16BitInsts = false;
5353
bool EnableRealTrue16Insts = false;
54+
bool HasBF16ConversionInsts = false;
5455
bool HasMadMixInsts = false;
5556
bool HasMadMacF32Insts = false;
5657
bool HasDsSrc2Insts = false;
@@ -166,6 +167,10 @@ class AMDGPUSubtarget {
166167
// supported and the support for fake True16 instructions is removed.
167168
bool useRealTrue16Insts() const;
168169

170+
bool hasBF16ConversionInsts() const {
171+
return HasBF16ConversionInsts;
172+
}
173+
169174
bool hasMadMixInsts() const {
170175
return HasMadMixInsts;
171176
}

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2774,6 +2774,7 @@ def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
27742774
def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
27752775
def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
27762776
def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>;
2777+
def VOP_F32_BF16 : VOPProfile <[f32, bf16, untyped, untyped]>;
27772778

27782779
def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
27792780
def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,9 @@ let OtherPredicates = [UseRealTrue16Insts] in
311311
let OtherPredicates = [UseFakeTrue16Insts] in
312312
defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
313313

314+
let SubtargetPredicate = HasBF16ConversionInsts in
315+
defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
316+
314317
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
315318
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
316319
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
@@ -1514,6 +1517,9 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
15141517
let AssemblerPredicate = isGFX940Plus in
15151518
defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>;
15161519

1520+
let AssemblerPredicate = HasGFX950Insts in
1521+
defm V_CVT_F32_BF16 : VOP1_Real_gfx9 <0x5b>;
1522+
15171523
defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>;
15181524
defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>;
15191525
defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>;

llvm/test/MC/AMDGPU/gfx950_asm_vop1.s

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s
1+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s
2+
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=GFX940-ERR --strict-whitespace %s
23

34
v_prng_b32 v5, v1
45
// GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e]
@@ -55,3 +56,75 @@ v_prng_b32 v5, src_scc
5556
v_prng_b32 v255, 0xaf123456
5657
// GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
5758
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
59+
60+
v_cvt_f32_bf16 v5, v1
61+
// GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e]
62+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
63+
64+
v_cvt_f32_bf16 v5, v127
65+
// GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e]
66+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
67+
68+
v_cvt_f32_bf16 v5, s1
69+
// GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e]
70+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
71+
72+
v_cvt_f32_bf16 v5, vcc_lo
73+
// GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e]
74+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
75+
76+
v_cvt_f32_bf16 v5, vcc_hi
77+
// GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e]
78+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
79+
80+
v_cvt_f32_bf16 v5, ttmp15
81+
// GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e]
82+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
83+
84+
v_cvt_f32_bf16 v5, m0
85+
// GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e]
86+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
87+
88+
v_cvt_f32_bf16 v5, exec_lo
89+
// GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e]
90+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
91+
92+
v_cvt_f32_bf16 v5, exec_hi
93+
// GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e]
94+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
95+
96+
v_cvt_f32_bf16 v5, -1
97+
// GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e]
98+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
99+
100+
v_cvt_f32_bf16 v5, 0.5
101+
// GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e]
102+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
103+
104+
v_cvt_f32_bf16 v5, src_scc
105+
// GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e]
106+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
107+
108+
v_cvt_f32_bf16 v127, 0x8000
109+
// GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00]
110+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
111+
112+
v_cvt_f32_bf16 v5, -v1
113+
// GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20]
114+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
115+
116+
v_cvt_f32_bf16 v5, |v1|
117+
// GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00]
118+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
119+
120+
v_cvt_f32_bf16 v5, -|v1|
121+
// GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20]
122+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
123+
124+
v_cvt_f32_bf16 v5, v1 clamp mul:2
125+
// GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08]
126+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
127+
128+
v_cvt_f32_bf16_e64 v5, v1 clamp div:2
129+
// GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18]
130+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt

Lines changed: 109 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,112 @@
4040
0xfd,0xb0,0x0a,0x7e
4141

4242
# GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
43-
0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf
43+
0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf
44+
45+
# GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e]
46+
0x01,0xb7,0x0a,0x7e
47+
48+
# GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e]
49+
0x7f,0xb7,0x0a,0x7e
50+
51+
# GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e]
52+
0x01,0xb6,0x0a,0x7e
53+
54+
# GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e]
55+
0x6a,0xb6,0x0a,0x7e
56+
57+
# GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e]
58+
0x6b,0xb6,0x0a,0x7e
59+
60+
# GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e]
61+
0x7b,0xb6,0x0a,0x7e
62+
63+
# GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e]
64+
0x7c,0xb6,0x0a,0x7e
65+
66+
# GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e]
67+
0x7e,0xb6,0x0a,0x7e
68+
69+
# GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e]
70+
0x7f,0xb6,0x0a,0x7e
71+
72+
# GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e]
73+
0xc1,0xb6,0x0a,0x7e
74+
75+
# GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e]
76+
0xf0,0xb6,0x0a,0x7e
77+
78+
# GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e]
79+
0xfd,0xb6,0x0a,0x7e
80+
81+
# GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00]
82+
0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00
83+
84+
# GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20]
85+
0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20
86+
87+
# GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00]
88+
0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00
89+
90+
# GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20]
91+
0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20
92+
93+
# GFX950: v_cvt_f32_bf16_e64 v5, 0.5 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0xf0,0x00,0x00,0x08]
94+
0x05,0x80,0x9b,0xd1,0xf0,0x00,0x00,0x08
95+
96+
# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18]
97+
0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18
98+
99+
# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18]
100+
0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18
101+
102+
# GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e]
103+
0x01,0xb7,0x0a,0x7e
104+
105+
# GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e]
106+
0x7f,0xb7,0x0a,0x7e
107+
108+
# GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e]
109+
0x01,0xb6,0x0a,0x7e
110+
111+
# GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e]
112+
0x6a,0xb6,0x0a,0x7e
113+
114+
# GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e]
115+
0x6b,0xb6,0x0a,0x7e
116+
117+
# GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e]
118+
0x7b,0xb6,0x0a,0x7e
119+
120+
# GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e]
121+
0x7c,0xb6,0x0a,0x7e
122+
123+
# GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e]
124+
0x7e,0xb6,0x0a,0x7e
125+
126+
# GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e]
127+
0x7f,0xb6,0x0a,0x7e
128+
129+
# GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e]
130+
0xc1,0xb6,0x0a,0x7e
131+
132+
# GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e]
133+
0xf0,0xb6,0x0a,0x7e
134+
135+
# GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e]
136+
0xfd,0xb6,0x0a,0x7e
137+
138+
# GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00]
139+
0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00
140+
141+
# GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20]
142+
0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20
143+
144+
# GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00]
145+
0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00
146+
147+
# GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20]
148+
0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20
149+
150+
# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08]
151+
0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08

0 commit comments

Comments
 (0)