-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[RISCV] Implement foward inserting save/restore FRM instructions. #77744
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
61af743
bdfe1a8
68ddd7b
da0489e
6bceb53
5af6ebc
28cf429
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,11 @@ using namespace llvm; | |
#define DEBUG_TYPE "riscv-insert-read-write-csr" | ||
#define RISCV_INSERT_READ_WRITE_CSR_NAME "RISC-V Insert Read/Write CSR Pass" | ||
|
||
static cl::opt<bool> | ||
DisableFRMInsertOpt("riscv-disable-frm-insert-opt", cl::init(false), | ||
cl::Hidden, | ||
cl::desc("Disable optimized frm insertion.")); | ||
|
||
namespace { | ||
|
||
class RISCVInsertReadWriteCSR : public MachineFunctionPass { | ||
|
@@ -46,6 +51,7 @@ class RISCVInsertReadWriteCSR : public MachineFunctionPass { | |
|
||
private: | ||
bool emitWriteRoundingMode(MachineBasicBlock &MBB); | ||
bool emitWriteRoundingModeOpt(MachineBasicBlock &MBB); | ||
}; | ||
|
||
} // end anonymous namespace | ||
|
@@ -55,6 +61,107 @@ char RISCVInsertReadWriteCSR::ID = 0; | |
INITIALIZE_PASS(RISCVInsertReadWriteCSR, DEBUG_TYPE, | ||
RISCV_INSERT_READ_WRITE_CSR_NAME, false, false) | ||
|
||
// TODO: Use more accurate rounding mode at the start of MBB. | ||
bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) { | ||
bool Changed = false; | ||
MachineInstr *LastFRMChanger = nullptr; | ||
std::optional<unsigned> CurrentRM = RISCVFPRndMode::DYN; | ||
std::optional<Register> SavedFRM; | ||
|
||
for (MachineInstr &MI : MBB) { | ||
if (MI.getOpcode() == RISCV::SwapFRMImm || | ||
MI.getOpcode() == RISCV::WriteFRMImm) { | ||
CurrentRM = MI.getOperand(0).getImm(); | ||
SavedFRM = std::nullopt; | ||
continue; | ||
} | ||
|
||
if (MI.getOpcode() == RISCV::WriteFRM) { | ||
CurrentRM = RISCVFPRndMode::DYN; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure here, but I think you may be confusing CurrentRM being DYN and CurrentRM being unknown. I don't know if it matters, but I think if I'm reading this right that DYN is supposed to be a "don't care" value, not an "unknown" value. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My idea is using DYN as the right rounding mode when executing MI. And I found that I don't need |
||
SavedFRM = std::nullopt; | ||
continue; | ||
} | ||
|
||
if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::FRM)) { | ||
// Restore FRM before unknown operations. | ||
if (SavedFRM.has_value()) | ||
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRM)) | ||
.addReg(*SavedFRM); | ||
CurrentRM = RISCVFPRndMode::DYN; | ||
SavedFRM = std::nullopt; | ||
continue; | ||
} | ||
|
||
assert(!MI.modifiesRegister(RISCV::FRM) && | ||
"Expected that MI could not modify FRM."); | ||
|
||
auto getInstructionRM = [](MachineInstr &MI) -> std::optional<unsigned> { | ||
int FRMIdx = RISCVII::getFRMOpNum(MI.getDesc()); | ||
if (FRMIdx >= 0) | ||
return MI.getOperand(FRMIdx).getImm(); | ||
|
||
if (!MI.hasRegisterImplicitUseOperand(RISCV::FRM)) | ||
return std::nullopt; | ||
|
||
return RISCVFPRndMode::DYN; | ||
}; | ||
|
||
std::optional<unsigned> InstrRM = getInstructionRM(MI); | ||
|
||
// Skip if MI does not need FRM. | ||
if (!InstrRM.has_value()) | ||
continue; | ||
|
||
if (InstrRM != RISCVFPRndMode::DYN) | ||
LastFRMChanger = &MI; | ||
|
||
if (!MI.readsRegister(RISCV::FRM)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this if is redundant as you'd have early-continued above. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*IsDef*/ false, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Must set Changed here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
/*IsImp*/ true)); | ||
|
||
// Skip if MI uses same rounding mode as FRM. | ||
if (InstrRM == CurrentRM) | ||
continue; | ||
|
||
if (InstrRM == RISCVFPRndMode::DYN) { | ||
if (!SavedFRM.has_value()) | ||
continue; | ||
// SavedFRM not having a value means current FRM has correct rounding | ||
// mode. | ||
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRM)) | ||
.addReg(*SavedFRM); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Must set Changed here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we don't need to set changed here, since change was set when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I moved the set of changed right after adding implicit use of frm. |
||
SavedFRM = std::nullopt; | ||
CurrentRM = RISCVFPRndMode::DYN; | ||
continue; | ||
} | ||
|
||
if (CurrentRM == RISCVFPRndMode::DYN) { | ||
// Save current FRM value to SavedFRM. | ||
MachineRegisterInfo *MRI = &MBB.getParent()->getRegInfo(); | ||
SavedFRM = MRI->createVirtualRegister(&RISCV::GPRRegClass); | ||
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm), *SavedFRM) | ||
.addImm(*InstrRM); | ||
} else { | ||
// Don't need to save current FRM when CurrentRM != DYN. | ||
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRMImm)) | ||
.addImm(*InstrRM); | ||
} | ||
CurrentRM = InstrRM; | ||
Changed = true; | ||
} | ||
|
||
// Restore FRM if needed. | ||
if (SavedFRM.has_value()) { | ||
assert(LastFRMChanger && "Expected valid pointer."); | ||
MachineInstrBuilder MIB = | ||
BuildMI(*MBB.getParent(), {}, TII->get(RISCV::WriteFRM)) | ||
.addReg(*SavedFRM); | ||
MBB.insertAfter(LastFRMChanger, MIB); | ||
} | ||
|
||
return Changed; | ||
} | ||
|
||
// This function also swaps frm and restores it when encountering an RVV | ||
// floating point instruction with a static rounding mode. | ||
bool RISCVInsertReadWriteCSR::emitWriteRoundingMode(MachineBasicBlock &MBB) { | ||
|
@@ -99,8 +206,12 @@ bool RISCVInsertReadWriteCSR::runOnMachineFunction(MachineFunction &MF) { | |
|
||
bool Changed = false; | ||
|
||
for (MachineBasicBlock &MBB : MF) | ||
Changed |= emitWriteRoundingMode(MBB); | ||
for (MachineBasicBlock &MBB : MF) { | ||
if (DisableFRMInsertOpt) | ||
Changed |= emitWriteRoundingMode(MBB); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No test coverage for the unoptimized version. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you think it is a good idea to remove the unoptimized part? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BTW, I had added unoptimized test cases in llvm/test/CodeGen/RISCV/rvv/frm-insert.ll |
||
else | ||
Changed |= emitWriteRoundingModeOpt(MBB); | ||
} | ||
|
||
return Changed; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 | ||
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -target-abi=lp64d < %s | FileCheck %s | ||
|
||
declare <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float>, | ||
<vscale x 1 x float>, | ||
<vscale x 1 x float>, | ||
i64, i64) | ||
|
||
; Test only save/restore frm once. | ||
define <vscale x 1 x float> @test(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind { | ||
; CHECK-LABEL: test: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma | ||
; CHECK-NEXT: fsrmi a0, 0 | ||
; CHECK-NEXT: vfadd.vv v8, v8, v9 | ||
; CHECK-NEXT: vfadd.vv v8, v8, v8 | ||
; CHECK-NEXT: fsrm a0 | ||
; CHECK-NEXT: ret | ||
entry: | ||
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float> undef, | ||
<vscale x 1 x float> %0, | ||
<vscale x 1 x float> %1, | ||
i64 0, i64 %2) | ||
%b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float> undef, | ||
<vscale x 1 x float> %a, | ||
<vscale x 1 x float> %a, | ||
i64 0, i64 %2) | ||
ret <vscale x 1 x float> %b | ||
} | ||
|
||
; Test only restore frm once. | ||
define <vscale x 1 x float> @test2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind { | ||
; CHECK-LABEL: test2: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma | ||
; CHECK-NEXT: fsrmi a0, 0 | ||
; CHECK-NEXT: vfadd.vv v8, v8, v9 | ||
; CHECK-NEXT: fsrmi 1 | ||
; CHECK-NEXT: vfadd.vv v8, v8, v8 | ||
; CHECK-NEXT: fsrm a0 | ||
; CHECK-NEXT: ret | ||
entry: | ||
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float> undef, | ||
<vscale x 1 x float> %0, | ||
<vscale x 1 x float> %1, | ||
i64 0, i64 %2) | ||
%b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float> undef, | ||
<vscale x 1 x float> %a, | ||
<vscale x 1 x float> %a, | ||
i64 1, i64 %2) | ||
ret <vscale x 1 x float> %b | ||
} | ||
|
||
; Test restoring frm before function call and doing nothing with following | ||
; dynamic rounding mode operations. | ||
declare void @foo() | ||
define <vscale x 1 x float> @test3(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add test cases for the following sub-cases?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
; CHECK-LABEL: test3: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: addi sp, sp, -32 | ||
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill | ||
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill | ||
; CHECK-NEXT: csrr a1, vlenb | ||
; CHECK-NEXT: slli a1, a1, 1 | ||
; CHECK-NEXT: sub sp, sp, a1 | ||
; CHECK-NEXT: mv s0, a0 | ||
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma | ||
; CHECK-NEXT: fsrmi a0, 0 | ||
; CHECK-NEXT: vfadd.vv v8, v8, v9 | ||
; CHECK-NEXT: addi a1, sp, 16 | ||
; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill | ||
; CHECK-NEXT: fsrm a0 | ||
; CHECK-NEXT: call foo | ||
; CHECK-NEXT: vsetvli zero, s0, e32, mf2, ta, ma | ||
; CHECK-NEXT: addi a0, sp, 16 | ||
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload | ||
; CHECK-NEXT: vfadd.vv v8, v8, v8 | ||
; CHECK-NEXT: csrr a0, vlenb | ||
; CHECK-NEXT: slli a0, a0, 1 | ||
; CHECK-NEXT: add sp, sp, a0 | ||
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload | ||
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload | ||
; CHECK-NEXT: addi sp, sp, 32 | ||
; CHECK-NEXT: ret | ||
entry: | ||
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float> undef, | ||
<vscale x 1 x float> %0, | ||
<vscale x 1 x float> %1, | ||
i64 0, i64 %2) | ||
call void @foo() | ||
%b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float> undef, | ||
<vscale x 1 x float> %a, | ||
<vscale x 1 x float> %a, | ||
i64 7, i64 %2) | ||
ret <vscale x 1 x float> %b | ||
} | ||
|
||
; Test restoring frm before inline asm and doing nothing with following dynamic | ||
; rounding mode operations. | ||
define <vscale x 1 x float> @test4(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind { | ||
; CHECK-LABEL: test4: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma | ||
; CHECK-NEXT: fsrmi a1, 0 | ||
; CHECK-NEXT: vfadd.vv v8, v8, v9 | ||
; CHECK-NEXT: fsrm a1 | ||
; CHECK-NEXT: #APP | ||
; CHECK-NEXT: #NO_APP | ||
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma | ||
; CHECK-NEXT: vfadd.vv v8, v8, v8 | ||
; CHECK-NEXT: ret | ||
entry: | ||
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float> undef, | ||
<vscale x 1 x float> %0, | ||
<vscale x 1 x float> %1, | ||
i64 0, i64 %2) | ||
call void asm sideeffect "", ""() | ||
%b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float> undef, | ||
<vscale x 1 x float> %a, | ||
<vscale x 1 x float> %a, | ||
i64 7, i64 %2) | ||
ret <vscale x 1 x float> %b | ||
} | ||
|
||
; Test restoring frm before reading frm and doing nothing with following | ||
; dynamic rounding mode operations. | ||
declare i32 @llvm.get.rounding() | ||
define <vscale x 1 x float> @test5(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2, ptr %p) nounwind { | ||
; CHECK-LABEL: test5: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma | ||
; CHECK-NEXT: fsrmi a0, 0 | ||
; CHECK-NEXT: vfadd.vv v8, v8, v9 | ||
; CHECK-NEXT: fsrm a0 | ||
; CHECK-NEXT: frrm a0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should be able to elide the read here. Not sure if worth implementing, just noting opportunity. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had added todo for it. Is it enough? |
||
; CHECK-NEXT: slli a0, a0, 2 | ||
; CHECK-NEXT: lui a2, 66 | ||
; CHECK-NEXT: addiw a2, a2, 769 | ||
; CHECK-NEXT: srl a0, a2, a0 | ||
; CHECK-NEXT: andi a0, a0, 7 | ||
; CHECK-NEXT: vfadd.vv v8, v8, v8 | ||
; CHECK-NEXT: sw a0, 0(a1) | ||
; CHECK-NEXT: ret | ||
entry: | ||
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float> undef, | ||
<vscale x 1 x float> %0, | ||
<vscale x 1 x float> %1, | ||
i64 0, i64 %2) | ||
%rm = call i32 @llvm.get.rounding() | ||
store i32 %rm, ptr %p, align 4 | ||
%b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float> undef, | ||
<vscale x 1 x float> %a, | ||
<vscale x 1 x float> %a, | ||
i64 7, i64 %2) | ||
ret <vscale x 1 x float> %b | ||
} | ||
|
||
; Test not set FRM for the two vfadd after WriteFRMImm. | ||
declare void @llvm.set.rounding(i32) | ||
define <vscale x 1 x float> @test6(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind { | ||
; CHECK-LABEL: test6: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: fsrmi 4 | ||
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma | ||
; CHECK-NEXT: vfadd.vv v8, v8, v9 | ||
; CHECK-NEXT: vfadd.vv v8, v8, v8 | ||
; CHECK-NEXT: ret | ||
entry: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please split this into two tests: one for following known same, one for following DYN. Also please add coverage for the case where the immediate next is known different constant. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
call void @llvm.set.rounding(i32 4) | ||
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float> undef, | ||
<vscale x 1 x float> %0, | ||
<vscale x 1 x float> %1, | ||
i64 4, i64 %2) | ||
%b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float> undef, | ||
<vscale x 1 x float> %a, | ||
<vscale x 1 x float> %a, | ||
i64 7, i64 %2) | ||
ret <vscale x 1 x float> %b | ||
} | ||
|
||
; Test not set FRM for the vfadd after WriteFRM. | ||
define <vscale x 1 x float> @test7(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %rm, i64 %2) nounwind { | ||
; CHECK-LABEL: test7: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: slli a0, a0, 32 | ||
; CHECK-NEXT: srli a0, a0, 30 | ||
; CHECK-NEXT: lui a2, 66 | ||
; CHECK-NEXT: addiw a2, a2, 769 | ||
; CHECK-NEXT: srl a0, a2, a0 | ||
; CHECK-NEXT: andi a0, a0, 7 | ||
; CHECK-NEXT: fsrm a0 | ||
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma | ||
; CHECK-NEXT: vfadd.vv v8, v8, v9 | ||
; CHECK-NEXT: ret | ||
entry: | ||
call void @llvm.set.rounding(i32 %rm) | ||
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32( | ||
<vscale x 1 x float> undef, | ||
<vscale x 1 x float> %0, | ||
<vscale x 1 x float> %1, | ||
i64 7, i64 %2) | ||
ret <vscale x 1 x float> %a | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we need std::optional. We can use
SavedFRM.isValid()
to track validity. And we can useSavedFRM = Register()
to make it invalid when we need to.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. Thank you for your suggestion.