Skip to content

Commit 0375ef0

Browse files
authored
[Clang][AMDGPU] Add __builtin_amdgcn_cvt_off_f32_i4 (llvm#133741)
This built-in maps to `V_CVT_OFF_F32_I4` which treats its input as a 4-bit signed integer and returns `0.0625f * src`. SWDEV-518861
1 parent 540dd89 commit 0375ef0

File tree

11 files changed

+279
-1
lines changed

11 files changed

+279
-1
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ Non-comprehensive list of changes in this release
167167

168168
- Support parsing the `cc` operand modifier and alias it to the `c` modifier (#GH127719).
169169
- Added `__builtin_elementwise_exp10`.
170+
- For AMDPGU targets, added `__builtin_v_cvt_off_f32_i4` that maps to the `v_cvt_off_f32_i4` instruction.
170171

171172
New Compiler Flags
172173
------------------

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc")
140140
BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
141141
BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
142142
BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
143+
BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc")
143144
BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc")
144145
BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
145146
BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc")
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \
3+
// RUN: -emit-llvm -o - | FileCheck %s
4+
5+
// CHECK-LABEL: @test_builtin_amdgcn_cvt_off_f32_i4_ui(
6+
// CHECK-NEXT: entry:
7+
// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
8+
// CHECK-NEXT: store i32 [[N:%.*]], ptr addrspace(5) [[N_ADDR]], align 4
9+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
10+
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 [[TMP0]])
11+
// CHECK-NEXT: ret float [[TMP1]]
12+
//
13+
float test_builtin_amdgcn_cvt_off_f32_i4_ui(unsigned n) {
14+
return __builtin_amdgcn_cvt_off_f32_i4(n);
15+
}
16+
17+
// CHECK-LABEL: @test_builtin_amdgcn_cvt_off_f32_i4_i(
18+
// CHECK-NEXT: entry:
19+
// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
20+
// CHECK-NEXT: store i32 [[N:%.*]], ptr addrspace(5) [[N_ADDR]], align 4
21+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
22+
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 [[TMP0]])
23+
// CHECK-NEXT: ret float [[TMP1]]
24+
//
25+
float test_builtin_amdgcn_cvt_off_f32_i4_i(int n) {
26+
return __builtin_amdgcn_cvt_off_f32_i4(n);
27+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s
2+
3+
void test_builtin_amdgcn_cvt_off_f32_i4(int n) {
4+
struct A{ unsigned x; } a;
5+
__builtin_amdgcn_cvt_off_f32_i4(n, n); // expected-error {{too many arguments to function call, expected 1, have 2}}
6+
__builtin_amdgcn_cvt_off_f32_i4(); // expected-error {{too few arguments to function call, expected 1, have 0}}
7+
__builtin_amdgcn_cvt_off_f32_i4(a); // expected-error {{passing '__private struct A' to parameter of incompatible type 'int'}}
8+
}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3375,6 +3375,12 @@ def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">,
33753375
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
33763376
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
33773377

3378+
// llvm.amdgcn.cvt.off.fp32.i4 int srcA
3379+
def int_amdgcn_cvt_off_f32_i4: ClangBuiltin<"__builtin_amdgcn_cvt_off_f32_i4">,
3380+
DefaultAttrsIntrinsic<[llvm_float_ty],
3381+
[llvm_i32_ty],
3382+
[IntrNoMem, IntrSpeculatable]>;
3383+
33783384
//===----------------------------------------------------------------------===//
33793385
// gfx950 intrinsics
33803386
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6042,6 +6042,7 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
60426042
// TODO: Handle more intrinsics
60436043
switch (IntrinsicID) {
60446044
case Intrinsic::amdgcn_cubeid:
6045+
case Intrinsic::amdgcn_cvt_off_f32_i4:
60456046
return true;
60466047

60476048
case Intrinsic::amdgcn_frexp_mant: {

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,29 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
729729

730730
break;
731731
}
732+
case Intrinsic::amdgcn_cvt_off_f32_i4: {
733+
Value* Arg = II.getArgOperand(0);
734+
Type *Ty = II.getType();
735+
736+
if (isa<PoisonValue>(Arg))
737+
return IC.replaceInstUsesWith(II, PoisonValue::get(Ty));
738+
739+
if(IC.getSimplifyQuery().isUndefValue(Arg))
740+
return IC.replaceInstUsesWith(II, Constant::getNullValue(Ty));
741+
742+
ConstantInt *CArg = dyn_cast<ConstantInt>(II.getArgOperand(0));
743+
if (!CArg)
744+
break;
745+
746+
// Tabulated 0.0625 * (sext (CArg & 0xf)).
747+
constexpr size_t ResValsSize = 16;
748+
static constexpr float ResVals[ResValsSize] = {
749+
0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
750+
-0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
751+
Constant *Res =
752+
ConstantFP::get(Ty, ResVals[CArg->getZExtValue() & (ResValsSize - 1)]);
753+
return IC.replaceInstUsesWith(II, Res);
754+
}
732755
case Intrinsic::amdgcn_ubfe:
733756
case Intrinsic::amdgcn_sbfe: {
734757
// Decompose simple cases into standard shifts.

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4585,6 +4585,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
45854585
case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
45864586
case Intrinsic::amdgcn_cvt_f32_fp8:
45874587
case Intrinsic::amdgcn_cvt_f32_bf8:
4588+
case Intrinsic::amdgcn_cvt_off_f32_i4:
45884589
case Intrinsic::amdgcn_cvt_pk_f32_fp8:
45894590
case Intrinsic::amdgcn_cvt_pk_f32_bf8:
45904591
case Intrinsic::amdgcn_cvt_pk_fp8_f32:

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
317317
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
318318
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
319319
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
320-
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
320+
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32, int_amdgcn_cvt_off_f32_i4>;
321321
} // End ReadsModeReg = 0, mayRaiseFPException = 0
322322
} // End SchedRW = [WriteFloatCvt]
323323

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --global-isel=0 -mtriple=amdgcn -mcpu=tahiti %s -o - | FileCheck %s
3+
; RUN: llc --global-isel=1 -mtriple=amdgcn -mcpu=tahiti %s -o - | FileCheck %s
4+
; RUN: llc --global-isel=0 -mtriple=amdgcn -mcpu=tonga %s -o - | FileCheck %s
5+
; RUN: llc --global-isel=1 -mtriple=amdgcn -mcpu=tonga %s -o - | FileCheck %s
6+
; RUN: llc --global-isel=0 -mtriple=amdgcn -mcpu=gfx90a %s -o - | FileCheck %s
7+
; RUN: llc --global-isel=1 -mtriple=amdgcn -mcpu=gfx90a %s -o - | FileCheck %s
8+
9+
declare float @llvm.amdgcn.cvt.off.f32.i4(i32)
10+
11+
define amdgpu_cs float @cvt_var(i32 %a) {
12+
; CHECK-LABEL: cvt_var:
13+
; CHECK: ; %bb.0:
14+
; CHECK-NEXT: v_cvt_off_f32_i4_e32 v0, v0
15+
; CHECK-NEXT: ; return to shader part epilog
16+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 %a)
17+
ret float %ret
18+
}
19+
20+
define amdgpu_cs float @cvt_imm() {
21+
; CHECK-LABEL: cvt_imm:
22+
; CHECK: ; %bb.0:
23+
; CHECK-NEXT: v_cvt_off_f32_i4_e32 v0, 4
24+
; CHECK-NEXT: ; return to shader part epilog
25+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 4)
26+
ret float %ret
27+
}
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine < %s | FileCheck %s
3+
4+
@gv = constant i32 0
5+
6+
define float @cvt_var(i32 %a) {
7+
; CHECK-LABEL: define float @cvt_var(
8+
; CHECK-SAME: i32 [[A:%.*]]) {
9+
; CHECK-NEXT: [[RET:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 [[A]])
10+
; CHECK-NEXT: ret float [[RET]]
11+
;
12+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 %a)
13+
ret float %ret
14+
}
15+
16+
define float @cvt_imm_0() {
17+
; CHECK-LABEL: define float @cvt_imm_0() {
18+
; CHECK-NEXT: ret float 0.000000e+00
19+
;
20+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 0)
21+
ret float %ret
22+
}
23+
24+
define float @cvt_imm_1() {
25+
; CHECK-LABEL: define float @cvt_imm_1() {
26+
; CHECK-NEXT: ret float 6.250000e-02
27+
;
28+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 1)
29+
ret float %ret
30+
}
31+
32+
define float @cvt_imm_2() {
33+
; CHECK-LABEL: define float @cvt_imm_2() {
34+
; CHECK-NEXT: ret float 1.250000e-01
35+
;
36+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 2)
37+
ret float %ret
38+
}
39+
40+
define float @cvt_imm_3() {
41+
; CHECK-LABEL: define float @cvt_imm_3() {
42+
; CHECK-NEXT: ret float 1.875000e-01
43+
;
44+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 3)
45+
ret float %ret
46+
}
47+
48+
define float @cvt_imm_4() {
49+
; CHECK-LABEL: define float @cvt_imm_4() {
50+
; CHECK-NEXT: ret float 2.500000e-01
51+
;
52+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 4)
53+
ret float %ret
54+
}
55+
56+
define float @cvt_imm_5() {
57+
; CHECK-LABEL: define float @cvt_imm_5() {
58+
; CHECK-NEXT: ret float 3.125000e-01
59+
;
60+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 5)
61+
ret float %ret
62+
}
63+
64+
define float @cvt_imm_6() {
65+
; CHECK-LABEL: define float @cvt_imm_6() {
66+
; CHECK-NEXT: ret float 3.750000e-01
67+
;
68+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 6)
69+
ret float %ret
70+
}
71+
72+
define float @cvt_imm_7() {
73+
; CHECK-LABEL: define float @cvt_imm_7() {
74+
; CHECK-NEXT: ret float 4.375000e-01
75+
;
76+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 7)
77+
ret float %ret
78+
}
79+
80+
define float @cvt_imm_8() {
81+
; CHECK-LABEL: define float @cvt_imm_8() {
82+
; CHECK-NEXT: ret float -5.000000e-01
83+
;
84+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 8)
85+
ret float %ret
86+
}
87+
88+
define float @cvt_imm_9() {
89+
; CHECK-LABEL: define float @cvt_imm_9() {
90+
; CHECK-NEXT: ret float -4.375000e-01
91+
;
92+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 9)
93+
ret float %ret
94+
}
95+
96+
define float @cvt_imm_10() {
97+
; CHECK-LABEL: define float @cvt_imm_10() {
98+
; CHECK-NEXT: ret float -3.750000e-01
99+
;
100+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 10)
101+
ret float %ret
102+
}
103+
104+
define float @cvt_imm_11() {
105+
; CHECK-LABEL: define float @cvt_imm_11() {
106+
; CHECK-NEXT: ret float -3.125000e-01
107+
;
108+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 11)
109+
ret float %ret
110+
}
111+
112+
define float @cvt_imm_12() {
113+
; CHECK-LABEL: define float @cvt_imm_12() {
114+
; CHECK-NEXT: ret float -2.500000e-01
115+
;
116+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 12)
117+
ret float %ret
118+
}
119+
120+
define float @cvt_imm_13() {
121+
; CHECK-LABEL: define float @cvt_imm_13() {
122+
; CHECK-NEXT: ret float -1.875000e-01
123+
;
124+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 13)
125+
ret float %ret
126+
}
127+
128+
define float @cvt_imm_14() {
129+
; CHECK-LABEL: define float @cvt_imm_14() {
130+
; CHECK-NEXT: ret float -1.250000e-01
131+
;
132+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 14)
133+
ret float %ret
134+
}
135+
136+
define float @cvt_imm_15() {
137+
; CHECK-LABEL: define float @cvt_imm_15() {
138+
; CHECK-NEXT: ret float -6.250000e-02
139+
;
140+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 15)
141+
ret float %ret
142+
}
143+
144+
define float @cvt_imm_underflow() {
145+
; CHECK-LABEL: define float @cvt_imm_underflow() {
146+
; CHECK-NEXT: ret float -6.250000e-02
147+
;
148+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 -1)
149+
ret float %ret
150+
}
151+
152+
define float @cvt_imm_overflow() {
153+
; CHECK-LABEL: define float @cvt_imm_overflow() {
154+
; CHECK-NEXT: ret float 0.000000e+00
155+
;
156+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 16)
157+
ret float %ret
158+
}
159+
160+
define float @cvt_poison() {
161+
; CHECK-LABEL: define float @cvt_poison() {
162+
; CHECK-NEXT: ret float poison
163+
;
164+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 poison)
165+
ret float %ret
166+
}
167+
168+
define float @cvt_undef() {
169+
; CHECK-LABEL: define float @cvt_undef() {
170+
; CHECK-NEXT: ret float 0.000000e+00
171+
;
172+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 undef)
173+
ret float %ret
174+
}
175+
176+
define float @cvt_constexpr() {
177+
; CHECK-LABEL: define float @cvt_constexpr() {
178+
; CHECK-NEXT: [[RET:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 ptrtoint (ptr @gv to i32))
179+
; CHECK-NEXT: ret float [[RET]]
180+
;
181+
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 ptrtoint (ptr @gv to i32))
182+
ret float %ret
183+
}

0 commit comments

Comments
 (0)