Skip to content

AMDGPU: Add MC support for gfx950 V_BITOP3_B32/B16 #117379

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,12 @@ def FeatureVmemWriteVgprInOrder : SubtargetFeature<"vmem-write-vgpr-in-order",
"VMEM instructions of the same type write VGPR results in order"
>;

def FeatureBitOp3Insts : SubtargetFeature<"bitop3-insts",
"HasBitOp3Insts",
"true",
"Has v_bitop3_b32/v_bitop3_b16 instructions"
>;

def FeaturePrngInst : SubtargetFeature<"prng-inst",
"HasPrngInst",
"true",
Expand Down Expand Up @@ -1524,7 +1530,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureCvtFP8VOP1Bug,
FeatureGFX950Insts,
FeaturePrngInst,
FeatureBF16ConversionInsts
FeatureBF16ConversionInsts,
FeatureBitOp3Insts
])>;

def FeatureISAVersion9_4_0 : FeatureSet<
Expand Down Expand Up @@ -2392,6 +2399,9 @@ def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">,
AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>;

def HasBitOp3Insts : Predicate<"Subtarget->hasBitOp3Insts()">,
AssemblerPredicate<(all_of FeatureBitOp3Insts)>;

def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">,
AssemblerPredicate<(all_of FeaturePrngInst)>;

Expand Down
25 changes: 25 additions & 0 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
ImmTyWaitVAVDst,
ImmTyWaitVMVSrc,
ImmTyByteSel,
ImmTyBitOp3,
};

// Immediate operand kind.
Expand Down Expand Up @@ -410,6 +411,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
bool isNegLo() const { return isImmTy(ImmTyNegLo); }
bool isNegHi() const { return isImmTy(ImmTyNegHi); }
bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }

bool isRegOrImm() const {
return isReg() || isImm();
Expand Down Expand Up @@ -1138,6 +1140,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
case ImmTyByteSel: OS << "ByteSel" ; break;
case ImmTyBitOp3: OS << "BitOp3"; break;
}
// clang-format on
}
Expand Down Expand Up @@ -1913,6 +1916,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
ParseStatus parseEndpgm(OperandVector &Operands);

ParseStatus parseVOPD(OperandVector &Operands);

ParseStatus parseBitOp3(OperandVector &Operands);
AMDGPUOperand::Ptr defaultBitOp3() const;
};

} // end anonymous namespace
Expand Down Expand Up @@ -8841,6 +8847,11 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
Inst.addOperand(Inst.getOperand(0));
}

int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
if (BitOp3Idx != -1) {
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
}

// FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
// instruction, and then figure out where to actually put the modifiers

Expand Down Expand Up @@ -9748,6 +9759,20 @@ ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {

bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }

//===----------------------------------------------------------------------===//
// BITOP3
//===----------------------------------------------------------------------===//

ParseStatus AMDGPUAsmParser::parseBitOp3(OperandVector &Operands) {
ParseStatus Res =
parseIntWithPrefix("bitop3", Operands, AMDGPUOperand::ImmTyBitOp3);
return Res;
}

AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBitOp3() const {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi,

This method seems to be unused on main. WIll it be used in some coming patch or should it be removed?
gcc warns like

../lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp:9809: warning: '{anonymous}::AMDGPUOperand::Ptr {anonymous}::AMDGPUAsmParser::defaultBitOp3() const' defined but not used [-Wunused-function]
 9809 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBitOp3() const {
      | 

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBitOp3);
}

//===----------------------------------------------------------------------===//
// Split Barrier
//===----------------------------------------------------------------------===//
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasSALUFloatInsts = false;
bool HasPseudoScalarTrans = false;
bool HasRestrictedSOffset = false;
bool HasBitOp3Insts = false;
bool HasPrngInst = false;
bool HasPermlane16Swap = false;
bool HasPermlane32Swap = false;
Expand Down Expand Up @@ -1321,6 +1322,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns true if the target has instructions with xf32 format support.
bool hasXF32Insts() const { return HasXF32Insts; }

bool hasBitOp3Insts() const { return HasBitOp3Insts; }

bool hasPermlane16Swap() const { return HasPermlane16Swap; }
bool hasPermlane32Swap() const { return HasPermlane32Swap; }

Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1714,4 +1714,18 @@ void AMDGPUInstPrinter::printNamedInt(const MCInst *MI, unsigned OpNo,
O << ' ' << Prefix << ':' << (PrintInHex ? formatHex(V) : formatDec(V));
}

void AMDGPUInstPrinter::printBitOp3(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
uint8_t Imm = MI->getOperand(OpNo).getImm();
if (!Imm)
return;

O << " bitop3:";
if (Imm <= 10)
O << formatDec(Imm);
else
O << formatHex(static_cast<uint64_t>(Imm));
}

#include "AMDGPUGenAsmWriter.inc"
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,9 @@ class AMDGPUInstPrinter : public MCInstPrinter {
const MCSubtargetInfo &STI, raw_ostream &O,
StringRef Prefix, bool PrintInHex, bool AlwaysPrint);

void printBitOp3(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);

public:
static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
StringRef Asm, StringRef Default = "");
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,9 @@ def ByteSel : NamedIntOperand<"byte_sel"> {
let Validator = "isUInt<2>";
}

def BitOp3 : CustomOperand<i8, 1, "BitOp3">;
def bitop3_0 : DefaultOperand<BitOp3, 0>;

class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_KIMM"#vt.Size;
Expand Down
48 changes: 48 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,24 @@ class PermlaneVarPat<SDPatternOperator permlane,
VGPR_32:$src1, VGPR_32:$vdst_in)
>;

class VOP3_BITOP3_Profile<VOPProfile pfl, VOP3Features f> : VOP3_Profile<pfl, f> {
let HasClamp = 0;
let HasOMod = 0;
let HasModifiers = 0;

let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
0 /* HasIntClamp */, HasModifiers, HasSrc2Mods,
HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret,
(ins bitop3_0:$bitop3));

let InsVOP3OpSel = !con(getInsVOP3Base<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 0, 1, 1, 0,
Src0Mod, Src1Mod, Src2Mod, 0>.ret,
(ins bitop3_0:$bitop3, op_sel0:$op_sel));

let Asm64 = "$vdst, $src0, $src1, $src2$bitop3";
let AsmVOP3OpSel = !subst("$op_sel", "$bitop3$op_sel", getAsmVOP3OpSel<3, 0, 0, 0, 0, 0>.ret);
}

let SubtargetPredicate = isGFX10Plus in {
let isCommutable = 1, isReMaterializable = 1 in {
defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
Expand Down Expand Up @@ -908,6 +926,16 @@ let SubtargetPredicate = isGFX12Plus in {

} // End SubtargetPredicate = isGFX12Plus

let SubtargetPredicate = HasBitOp3Insts in {
let isReMaterializable = 1 in {
defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16",
VOP3_BITOP3_Profile<VOPProfile_True16<VOPProfile <[i16, i16, i16, i16, i8]>>,
VOP3_OPSEL>>;
defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32",
VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i8]>, VOP3_REGULAR>>;
}
} // End SubtargetPredicate = HasBitOp3Insts

class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
(AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)),
Expand Down Expand Up @@ -1606,6 +1634,23 @@ multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
}
}

multiclass VOP3_Real_BITOP3_gfx9<bits<10> op, string AsmName, bit isSingle = 0> {
defvar ps = !cast<VOP_Pseudo>(NAME#"_e64");
let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
let AsmString = AsmName # ps.AsmOperands;
bits<8> bitop3;
let Inst{60-59} = bitop3{7-6};
let Inst{10-8} = bitop3{5-3};
let Inst{63-61} = bitop3{2-0};
let Inst{11} = !if(ps.Pfl.HasOpSel, src0_modifiers{2}, 0);
let Inst{12} = !if(ps.Pfl.HasOpSel, src1_modifiers{2}, 0);
let Inst{13} = !if(ps.Pfl.HasOpSel, src2_modifiers{2}, 0);
let Inst{14} = !if(ps.Pfl.HasOpSel, src0_modifiers{3}, 0);
}
}
}
} // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9"

defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
Expand Down Expand Up @@ -1748,3 +1793,6 @@ defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>;
defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;

defm V_BITOP3_B16 : VOP3_Real_BITOP3_gfx9<0x233, "v_bitop3_b16">;
defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">;
66 changes: 58 additions & 8 deletions llvm/test/MC/AMDGPU/gfx950_asm_vop3.s
Original file line number Diff line number Diff line change
@@ -1,26 +1,76 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=GFX940-ERR --strict-whitespace %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX906-ERR %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX940-ERR %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding < %s | FileCheck --check-prefix=GFX950 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX12-ERR %s

v_cvt_pk_bf16_f32 v5, v1, v2
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x68,0xd2,0x01,0x05,0x02,0x00]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX12-ERR: error: instruction not supported on this GPU

v_cvt_pk_bf16_f32 v5, v255, v255
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_cvt_pk_bf16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x68,0xd2,0xff,0xff,0x03,0x00]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX12-ERR: error: instruction not supported on this GPU

v_cvt_pk_bf16_f32 v5, v1, s2
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_cvt_pk_bf16_f32 v5, v1, s2 ; encoding: [0x05,0x00,0x68,0xd2,0x01,0x05,0x00,0x00]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX12-ERR: error: instruction not supported on this GPU

v_cvt_pk_bf16_f32 v5, m0, 0.5
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_cvt_pk_bf16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x68,0xd2,0x7c,0xe0,0x01,0x00]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX12-ERR: error: instruction not supported on this GPU

v_cvt_pk_bf16_f32 v5, -1, exec_hi
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_cvt_pk_bf16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x68,0xd2,0xc1,0xfe,0x00,0x00]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX12-ERR: error: instruction not supported on this GPU

v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x68,0xd2,0xf0,0xf8,0x00,0x08]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX12-ERR: error: instruction not supported on this GPU

v_bitop3_b32 v5, v1, v2, s3
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_bitop3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x34,0xd2,0x01,0x05,0x0e,0x00]
// GFX12-ERR: error: instruction not supported on this GPU

v_bitop3_b32 v5, v1, v2, s3 bitop3:161
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_bitop3_b32 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x34,0xd2,0x01,0x05,0x0e,0x30]
// GFX12-ERR: error: instruction not supported on this GPU

v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 ; encoding: [0x05,0x00,0x34,0xd2,0x7c,0xe0,0xf1,0xa1]
// GFX12-ERR: error: instruction not supported on this GPU

v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:101
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:0x65 ; encoding: [0x05,0x04,0x34,0xd2,0xf0,0xf8,0xc0,0xab]
// GFX12-ERR: error: instruction not supported on this GPU

v_bitop3_b16 v5, v1, v2, s3
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_bitop3_b16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x33,0xd2,0x01,0x05,0x0e,0x00]
// GFX12-ERR: error: instruction not supported on this GPU

v_bitop3_b16 v5, v1, v2, s3 bitop3:161
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_bitop3_b16 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30]
// GFX12-ERR: error: instruction not supported on this GPU
18 changes: 18 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,21 @@

# GFX950: v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x68,0xd2,0xf0,0xf8,0x00,0x08]
0x05,0x00,0x68,0xd2,0xf0,0xf8,0x00,0x08

# GFX950: v_bitop3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x34,0xd2,0x01,0x05,0x0e,0x00]
0x05,0x00,0x34,0xd2,0x01,0x05,0x0e,0x00

# GFX950: v_bitop3_b32 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x34,0xd2,0x01,0x05,0x0e,0x30]
0x05,0x04,0x34,0xd2,0x01,0x05,0x0e,0x30

# GFX950: v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 ; encoding: [0x05,0x00,0x34,0xd2,0x7c,0xe0,0xf1,0xa1]
0x05,0x00,0x34,0xd2,0x7c,0xe0,0xf1,0xa1

# GFX950: v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:0x65 ; encoding: [0x05,0x04,0x34,0xd2,0xf0,0xf8,0xc0,0xab]
0x05,0x04,0x34,0xd2,0xf0,0xf8,0xc0,0xab

# GFX950: v_bitop3_b16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x33,0xd2,0x01,0x05,0x0e,0x00]
0x05,0x00,0x33,0xd2,0x01,0x05,0x0e,0x00

# GFX950: v_bitop3_b16 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30]
0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30
Loading