Skip to content

Commit c8f0d27

Browse files
committed
[AMDGPU] Fix the gfx10 scheduling model for f32 conversions
Summary: As far as I can tell on gfx10 conversions to/from f32 (that are not converting f32 to/from f64) are full rate instructions, but they were marked as quarter rate instructions. I have fixed this for gfx10 only. I assume the scheduling model was correct for older architectures, though I don't have any documentation handy to confirm that. Reviewers: rampitec, arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D75392
1 parent 47ec870 commit c8f0d27

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

llvm/lib/Target/AMDGPU/SISchedule.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def MIMFMARead : SchedRead;
2929

3030
// Vector ALU instructions
3131
def Write32Bit : SchedWrite;
32+
def WriteFloatCvt : SchedWrite;
3233
def WriteQuarterRate32 : SchedWrite;
3334

3435
def WriteFloatFMA : SchedWrite;
@@ -126,6 +127,7 @@ multiclass SICommonWriteRes {
126127

127128
def : HWVALUWriteRes<Write32Bit, 1>;
128129
def : HWVALUWriteRes<Write64Bit, 2>;
130+
def : HWVALUWriteRes<WriteFloatCvt, 4>;
129131
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
130132
def : HWVALUWriteRes<Write2PassMAI, 2>;
131133
def : HWVALUWriteRes<Write8PassMAI, 8>;
@@ -185,6 +187,7 @@ let SchedModel = GFX10SpeedModel in {
185187
// The latency values are 1 / (operations / cycle).
186188
// Add 1 stall cycle for VGPR read.
187189
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
190+
def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
188191
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 9>;
189192
def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 17>;
190193
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
190190
defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
191191
} // End SchedRW = [WriteDoubleCvt]
192192

193-
let SchedRW = [WriteQuarterRate32] in {
193+
let SchedRW = [WriteFloatCvt] in {
194194
defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
195195
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
196196
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
@@ -202,7 +202,7 @@ defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
202202
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
203203
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
204204
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
205-
} // End SchedRW = [WriteQuarterRate32]
205+
} // End SchedRW = [WriteFloatCvt]
206206

207207
defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
208208
defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;

0 commit comments

Comments
 (0)