Skip to content

[RISCV] Add searchable table for tune information #66193

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions llvm/lib/Target/RISCV/RISCVFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -950,12 +950,3 @@ def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
"AllowTaggedGlobals",
"true", "Use an instruction sequence for taking the address of a global "
"that allows a memory tag in the upper address bits">;

foreach align = [2, 4, 8, 16, 32, 64] in {
def TunePrefFunctionAlignment # align :
SubtargetFeature<"pref-func-align-" # align, "PrefFunctionAlignment",
"Align(" # align # ")", "Set preferred function alignment to " # align # " bytes">;
def TunePrefLoopAlignment # align :
SubtargetFeature<"pref-loop-align-" # align, "PrefLoopAlignment",
"Align(" # align # ")", "Set preferred loop alignment to " # align # " bytes">;
}
26 changes: 23 additions & 3 deletions llvm/lib/Target/RISCV/RISCVProcessors.td
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,24 @@
// RISC-V processors supported.
//===----------------------------------------------------------------------===//

class RISCVTuneInfo {
bits<8> PrefFunctionAlignment = 1;
bits<8> PrefLoopAlignment = 1;
}

def RISCVTuneInfoTable : GenericTable {
let FilterClass = "RISCVTuneInfo";
let CppTypeName = "RISCVTuneInfo";
let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment"];
}

def getRISCVTuneInfo : SearchIndex {
let Table = RISCVTuneInfoTable;
let Key = ["Name"];
}

class GenericTuneInfo: RISCVTuneInfo;

class RISCVProcessorModel<string n,
SchedMachineModel m,
list<SubtargetFeature> f,
Expand All @@ -27,13 +45,15 @@ class RISCVTuneProcessorModel<string n,

def GENERIC_RV32 : RISCVProcessorModel<"generic-rv32",
NoSchedModel,
[Feature32Bit]>;
[Feature32Bit]>,
GenericTuneInfo;
def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64",
NoSchedModel,
[Feature64Bit]>;
[Feature64Bit]>,
GenericTuneInfo;
// Support generic for compatibility with other targets. The triple will be used
// to change to the appropriate rv32/rv64 version.
def : ProcessorModel<"generic", NoSchedModel, []>;
def : ProcessorModel<"generic", NoSchedModel, []>, GenericTuneInfo;

def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32",
RocketModel,
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "RISCVGenSubtargetInfo.inc"

namespace llvm::RISCVTuneInfoTable {

#define GET_RISCVTuneInfoTable_IMPL
#include "RISCVGenSearchableTables.inc"
} // namespace llvm::RISCVTuneInfoTable

static cl::opt<bool> EnableSubRegLiveness("riscv-enable-subreg-liveness",
cl::init(true), cl::Hidden);

Expand Down Expand Up @@ -65,6 +71,12 @@ RISCVSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef CPU,
if (TuneCPU.empty())
TuneCPU = CPU;

TuneInfo = RISCVTuneInfoTable::getRISCVTuneInfo(TuneCPU);
// If there is no TuneInfo for this CPU, we fail back to generic.
if (!TuneInfo)
TuneInfo = RISCVTuneInfoTable::getRISCVTuneInfo("generic");
assert(TuneInfo && "TuneInfo shouldn't be nullptr!");

ParseSubtargetFeatures(CPU, TuneCPU, FS);
TargetABI = RISCVABI::computeTargetABI(TT, getFeatureBits(), ABIName);
RISCVFeatures::validate(TT, getFeatureBits());
Expand Down
23 changes: 19 additions & 4 deletions llvm/lib/Target/RISCV/RISCVSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@
namespace llvm {
class StringRef;

namespace RISCVTuneInfoTable {

struct RISCVTuneInfo {
const char *Name;
uint8_t PrefFunctionAlignment;
uint8_t PrefLoopAlignment;
};

#define GET_RISCVTuneInfoTable_DECL
#include "RISCVGenSearchableTables.inc"
} // namespace RISCVTuneInfoTable

class RISCVSubtarget : public RISCVGenSubtargetInfo {
public:
enum RISCVProcFamilyEnum : uint8_t {
Expand All @@ -54,8 +66,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
uint8_t MaxInterleaveFactor = 2;
RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
std::bitset<RISCV::NUM_TARGET_REGS> UserReservedRegister;
Align PrefFunctionAlignment;
Align PrefLoopAlignment;
const RISCVTuneInfoTable::RISCVTuneInfo *TuneInfo;

RISCVFrameLowering FrameLowering;
RISCVInstrInfo InstrInfo;
Expand Down Expand Up @@ -96,8 +107,12 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
}
bool enableMachineScheduler() const override { return true; }

Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
Align getPrefFunctionAlignment() const {
return Align(TuneInfo->PrefFunctionAlignment);
}
Align getPrefLoopAlignment() const {
return Align(TuneInfo->PrefLoopAlignment);
}

/// Returns RISC-V processor family.
/// Avoid this function! CPU specifics should be kept local to this class
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/RISCV/align-loops.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
; RUN: llc < %s -mtriple=riscv64 | FileCheck %s
; RUN: llc < %s -mtriple=riscv64 -align-loops=16 | FileCheck %s -check-prefix=ALIGN_16
; RUN: llc < %s -mtriple=riscv64 -align-loops=32 | FileCheck %s -check-prefix=ALIGN_32
; RUN: llc < %s -mtriple=riscv64 -mattr=+pref-loop-align-16 | FileCheck %s -check-prefix=ALIGN_16
; RUN: llc < %s -mtriple=riscv64 -mattr=+pref-loop-align-32 | FileCheck %s -check-prefix=ALIGN_32

declare void @foo()

Expand Down
4 changes: 0 additions & 4 deletions llvm/test/CodeGen/RISCV/align.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
; RUN: | FileCheck %s -check-prefix=RV32I
; RUN: llc -mtriple=riscv32 -mattr=+c -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32C
; RUN: llc -mtriple=riscv32 -mattr=+pref-func-align-32 -verify-machineinstrs < %s \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there any processors that have non-default alignment? We could pass -mcpu and check for the .p2align N line so that we have test coverage.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know one. Maybe you can provide more information about SiFive processors in upstream?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We haven't noticed any performance issues from instruction alignment on SiFive7 and are relying on linker to get this right. I'm not so sure what else we can do to have test coverage for this without an upstream processor that uses this TuneInfo 🤷

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've just learned that different alignment values have different results on different workloads for us. Therefore, I think we should have way to control this value at compile time.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are IR attributes that override the CPU defaults. falign-loops and falign-functions uses those attributes.

Copy link
Contributor Author

@wangpc-pp wangpc-pp Sep 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found some descriptions in https://starfivetech.com/uploads/u74mc_core_complex_manual_21G1.pdf:

3.2.6 Instruction Fetch Unit
The S7 instruction fetch unit is responsible for keeping the pipeline fed with instructions from
memory. The instruction fetch unit delivers up to 8 bytes of instructions per clock cycle to sup-
port superscalar instruction execution. Fetches are always word-aligned and there is a one-
cycle penalty for branching to a 32-bit instruction that is not word-aligned.

It seems we'd better to align functions and hot loops to 4-byte to get best performance. But C extension is supported so the alignment would be 2-byte.

  // Function alignments.
  const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
  setMinFunctionAlignment(FunctionAlignment);
  // Set preferred alignments.
  setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
  setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());

I don't know much about S7 series, so I think you SiFive guys may do the change of alignments in upstream?

; RUN: | FileCheck %s -check-prefix=ALIGN-32
; RUN: llc -filetype=obj -mtriple=riscv32 < %s -o %t
; RUN: llvm-readelf -S %t | FileCheck %s --check-prefixes=SEC,SEC-I
; RUN: llc -filetype=obj -mtriple=riscv32 -mattr=+c < %s -o %t
Expand All @@ -18,8 +16,6 @@ define void @foo() {
;RV32I: foo:
;RV32C: .p2align 1
;RV32C: foo:
;ALIGN-32: .p2align 5
;ALIGN-32: foo:
entry:
ret void
}