llvm · arsenm · Nov 25, 2024 · Jan 26, 2024 · mikaelholmen · Dec 4, 2024
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -997,6 +997,12 @@ def FeatureVmemWriteVgprInOrder : SubtargetFeature<"vmem-write-vgpr-in-order",
   "VMEM instructions of the same type write VGPR results in order"
 >;
 
+def FeatureBitOp3Insts : SubtargetFeature<"bitop3-insts",
+  "HasBitOp3Insts",
+  "true",
+  "Has v_bitop3_b32/v_bitop3_b16 instructions"
+>;
+
 def FeaturePrngInst : SubtargetFeature<"prng-inst",
   "HasPrngInst",
   "true",
@@ -1524,7 +1530,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
    FeatureCvtFP8VOP1Bug,
    FeatureGFX950Insts,
    FeaturePrngInst,
-   FeatureBF16ConversionInsts
+   FeatureBF16ConversionInsts,
+   FeatureBitOp3Insts
    ])>;
 
 def FeatureISAVersion9_4_0 : FeatureSet<
@@ -2392,6 +2399,9 @@ def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
 def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">,
   AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>;
 
+def HasBitOp3Insts : Predicate<"Subtarget->hasBitOp3Insts()">,
+  AssemblerPredicate<(all_of FeatureBitOp3Insts)>;
+
 def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">,
   AssemblerPredicate<(all_of FeaturePrngInst)>;
 

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -171,6 +171,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     ImmTyWaitVAVDst,
     ImmTyWaitVMVSrc,
     ImmTyByteSel,
+    ImmTyBitOp3,
   };
 
   // Immediate operand kind.
@@ -410,6 +411,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
+  bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
 
   bool isRegOrImm() const {
     return isReg() || isImm();
@@ -1138,6 +1140,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
     case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
     case ImmTyByteSel: OS << "ByteSel" ; break;
+    case ImmTyBitOp3: OS << "BitOp3"; break;
     }
     // clang-format on
   }
@@ -1913,6 +1916,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
   ParseStatus parseEndpgm(OperandVector &Operands);
 
   ParseStatus parseVOPD(OperandVector &Operands);
+
+  ParseStatus parseBitOp3(OperandVector &Operands);
+  AMDGPUOperand::Ptr defaultBitOp3() const;
 };
 
 } // end anonymous namespace
@@ -8841,6 +8847,11 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
     Inst.addOperand(Inst.getOperand(0));
   }
 
+  int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
+  if (BitOp3Idx != -1) {
+    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
+  }
+
   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
   // instruction, and then figure out where to actually put the modifiers
 
@@ -9748,6 +9759,20 @@ ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
 
 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
 
+//===----------------------------------------------------------------------===//
+// BITOP3
+//===----------------------------------------------------------------------===//
+
+ParseStatus AMDGPUAsmParser::parseBitOp3(OperandVector &Operands) {
+  ParseStatus Res =
+      parseIntWithPrefix("bitop3", Operands, AMDGPUOperand::ImmTyBitOp3);
+  return Res;
+}
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBitOp3() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBitOp3);
+}
+
 //===----------------------------------------------------------------------===//
 // Split Barrier
 //===----------------------------------------------------------------------===//

diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -220,6 +220,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasSALUFloatInsts = false;
   bool HasPseudoScalarTrans = false;
   bool HasRestrictedSOffset = false;
+  bool HasBitOp3Insts = false;
   bool HasPrngInst = false;
   bool HasPermlane16Swap = false;
   bool HasPermlane32Swap = false;
@@ -1321,6 +1322,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// \returns true if the target has instructions with xf32 format support.
   bool hasXF32Insts() const { return HasXF32Insts; }
 
+  bool hasBitOp3Insts() const { return HasBitOp3Insts; }
+
   bool hasPermlane16Swap() const { return HasPermlane16Swap; }
   bool hasPermlane32Swap() const { return HasPermlane32Swap; }
 

diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1714,4 +1714,18 @@ void AMDGPUInstPrinter::printNamedInt(const MCInst *MI, unsigned OpNo,
     O << ' ' << Prefix << ':' << (PrintInHex ? formatHex(V) : formatDec(V));
 }
 
+void AMDGPUInstPrinter::printBitOp3(const MCInst *MI, unsigned OpNo,
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
+  uint8_t Imm = MI->getOperand(OpNo).getImm();
+  if (!Imm)
+    return;
+
+  O << " bitop3:";
+  if (Imm <= 10)
+    O << formatDec(Imm);
+  else
+    O << formatHex(static_cast<uint64_t>(Imm));
+}
+
 #include "AMDGPUGenAsmWriter.inc"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -163,6 +163,9 @@ class AMDGPUInstPrinter : public MCInstPrinter {
                      const MCSubtargetInfo &STI, raw_ostream &O,
                      StringRef Prefix, bool PrintInHex, bool AlwaysPrint);
 
+  void printBitOp3(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
+
 public:
   static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
                          StringRef Asm, StringRef Default = "");

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1271,6 +1271,9 @@ def ByteSel : NamedIntOperand<"byte_sel"> {
   let Validator = "isUInt<2>";
 }
 
+def BitOp3 : CustomOperand<i8, 1, "BitOp3">;
+def bitop3_0 : DefaultOperand<BitOp3, 0>;
+
 class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
   let OperandNamespace = "AMDGPU";
   let OperandType = "OPERAND_KIMM"#vt.Size;

diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -856,6 +856,24 @@ class PermlaneVarPat<SDPatternOperator permlane,
         VGPR_32:$src1, VGPR_32:$vdst_in)
 >;
 
+class VOP3_BITOP3_Profile<VOPProfile pfl, VOP3Features f> : VOP3_Profile<pfl, f> {
+  let HasClamp = 0;
+  let HasOMod = 0;
+  let HasModifiers = 0;
+
+  let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
+                            0 /* HasIntClamp */, HasModifiers, HasSrc2Mods,
+                            HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret,
+                   (ins bitop3_0:$bitop3));
+
+  let InsVOP3OpSel = !con(getInsVOP3Base<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 0, 1, 1, 0,
+                                         Src0Mod, Src1Mod, Src2Mod, 0>.ret,
+                          (ins bitop3_0:$bitop3, op_sel0:$op_sel));
+
+  let Asm64 = "$vdst, $src0, $src1, $src2$bitop3";
+  let AsmVOP3OpSel = !subst("$op_sel", "$bitop3$op_sel", getAsmVOP3OpSel<3, 0, 0, 0, 0, 0>.ret);
+}
+
 let SubtargetPredicate = isGFX10Plus in {
   let isCommutable = 1, isReMaterializable = 1 in {
     defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -908,6 +926,16 @@ let SubtargetPredicate = isGFX12Plus in {
 
 } // End SubtargetPredicate = isGFX12Plus
 
+let SubtargetPredicate = HasBitOp3Insts  in {
+  let isReMaterializable = 1 in {
+    defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16",
+                                  VOP3_BITOP3_Profile<VOPProfile_True16<VOPProfile <[i16, i16, i16, i16, i8]>>,
+                                                      VOP3_OPSEL>>;
+    defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32",
+                                  VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i8]>, VOP3_REGULAR>>;
+  }
+} // End SubtargetPredicate = HasBitOp3Insts
+
 class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
   (AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
                   (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)),
@@ -1606,6 +1634,23 @@ multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
             }
 }
 
+multiclass VOP3_Real_BITOP3_gfx9<bits<10> op, string AsmName, bit isSingle = 0> {
+  defvar ps = !cast<VOP_Pseudo>(NAME#"_e64");
+  let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
+    def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
+                VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
+      let AsmString = AsmName # ps.AsmOperands;
+      bits<8> bitop3;
+      let Inst{60-59} = bitop3{7-6};
+      let Inst{10-8}  = bitop3{5-3};
+      let Inst{63-61} = bitop3{2-0};
+      let Inst{11} = !if(ps.Pfl.HasOpSel, src0_modifiers{2}, 0);
+      let Inst{12} = !if(ps.Pfl.HasOpSel, src1_modifiers{2}, 0);
+      let Inst{13} = !if(ps.Pfl.HasOpSel, src2_modifiers{2}, 0);
+      let Inst{14} = !if(ps.Pfl.HasOpSel, src0_modifiers{3}, 0);
+    }
+  }
+}
 } // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9"
 
 defm V_MAD_U64_U32      : VOP3be_Real_vi <0x1E8>;
@@ -1748,3 +1793,6 @@ defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
 defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>;
 defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
 defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;
+
+defm V_BITOP3_B16         : VOP3_Real_BITOP3_gfx9<0x233, "v_bitop3_b16">;
+defm V_BITOP3_B32         : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">;
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx950_asm_vop3.s
@@ -1,26 +1,76 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=GFX940-ERR --strict-whitespace  %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX906-ERR %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX940-ERR %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding < %s | FileCheck --check-prefix=GFX950 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX12-ERR %s
 
 v_cvt_pk_bf16_f32 v5, v1, v2
+// GFX906-ERR: error: instruction not supported on this GPU
+// GFX940-ERR: error: instruction not supported on this GPU
 // GFX950: v_cvt_pk_bf16_f32 v5, v1, v2            ; encoding: [0x05,0x00,0x68,0xd2,0x01,0x05,0x02,0x00]
-// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: error: instruction not supported on this GPU
 
 v_cvt_pk_bf16_f32 v5, v255, v255
+// GFX906-ERR: error: instruction not supported on this GPU
+// GFX940-ERR: error: instruction not supported on this GPU
 // GFX950: v_cvt_pk_bf16_f32 v5, v255, v255        ; encoding: [0x05,0x00,0x68,0xd2,0xff,0xff,0x03,0x00]
-// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: error: instruction not supported on this GPU
 
 v_cvt_pk_bf16_f32 v5, v1, s2
+// GFX906-ERR: error: instruction not supported on this GPU
+// GFX940-ERR: error: instruction not supported on this GPU
 // GFX950: v_cvt_pk_bf16_f32 v5, v1, s2           ; encoding: [0x05,0x00,0x68,0xd2,0x01,0x05,0x00,0x00]
-// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: error: instruction not supported on this GPU
 
 v_cvt_pk_bf16_f32 v5, m0, 0.5
+// GFX906-ERR: error: instruction not supported on this GPU
+// GFX940-ERR: error: instruction not supported on this GPU
 // GFX950: v_cvt_pk_bf16_f32 v5, m0, 0.5           ; encoding: [0x05,0x00,0x68,0xd2,0x7c,0xe0,0x01,0x00]
-// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: error: instruction not supported on this GPU
 
 v_cvt_pk_bf16_f32 v5, -1, exec_hi
+// GFX906-ERR: error: instruction not supported on this GPU
+// GFX940-ERR: error: instruction not supported on this GPU
 // GFX950: v_cvt_pk_bf16_f32 v5, -1, exec_hi       ; encoding: [0x05,0x00,0x68,0xd2,0xc1,0xfe,0x00,0x00]
-// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: error: instruction not supported on this GPU
 
 v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2
+// GFX906-ERR: error: instruction not supported on this GPU
+// GFX940-ERR: error: instruction not supported on this GPU
 // GFX950: v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2     ; encoding: [0x05,0x00,0x68,0xd2,0xf0,0xf8,0x00,0x08]
-// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: error: instruction not supported on this GPU
+
+v_bitop3_b32 v5, v1, v2, s3
+// GFX906-ERR: error: instruction not supported on this GPU
+// GFX940-ERR: error: instruction not supported on this GPU
+// GFX950: v_bitop3_b32 v5, v1, v2, s3             ; encoding: [0x05,0x00,0x34,0xd2,0x01,0x05,0x0e,0x00]
+// GFX12-ERR: error: instruction not supported on this GPU
+
+v_bitop3_b32 v5, v1, v2, s3 bitop3:161
+// GFX906-ERR: error: instruction not supported on this GPU
+// GFX940-ERR: error: instruction not supported on this GPU
+// GFX950: v_bitop3_b32 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x34,0xd2,0x01,0x05,0x0e,0x30]
+// GFX12-ERR: error: instruction not supported on this GPU
+
+v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5
+// GFX906-ERR: error: instruction not supported on this GPU
+// GFX940-ERR: error: instruction not supported on this GPU
+// GFX950: v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5   ; encoding: [0x05,0x00,0x34,0xd2,0x7c,0xe0,0xf1,0xa1]
+// GFX12-ERR: error: instruction not supported on this GPU
+
+v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:101
+// GFX906-ERR: error: instruction not supported on this GPU
+// GFX940-ERR: error: instruction not supported on this GPU
+// GFX950: v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:0x65 ; encoding: [0x05,0x04,0x34,0xd2,0xf0,0xf8,0xc0,0xab]
+// GFX12-ERR: error: instruction not supported on this GPU
+
+v_bitop3_b16 v5, v1, v2, s3
+// GFX906-ERR: error: instruction not supported on this GPU
+// GFX940-ERR: error: instruction not supported on this GPU
+// GFX950: v_bitop3_b16 v5, v1, v2, s3             ; encoding: [0x05,0x00,0x33,0xd2,0x01,0x05,0x0e,0x00]
+// GFX12-ERR: error: instruction not supported on this GPU
+
+v_bitop3_b16 v5, v1, v2, s3 bitop3:161
+// GFX906-ERR: error: instruction not supported on this GPU
+// GFX940-ERR: error: instruction not supported on this GPU
+// GFX950: v_bitop3_b16 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30]
+// GFX12-ERR: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -17,3 +17,21 @@
 
 # GFX950: v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2     ; encoding: [0x05,0x00,0x68,0xd2,0xf0,0xf8,0x00,0x08]
 0x05,0x00,0x68,0xd2,0xf0,0xf8,0x00,0x08
+
+# GFX950: v_bitop3_b32 v5, v1, v2, s3               ; encoding: [0x05,0x00,0x34,0xd2,0x01,0x05,0x0e,0x00]
+0x05,0x00,0x34,0xd2,0x01,0x05,0x0e,0x00
+
+# GFX950: v_bitop3_b32 v5, v1, v2, s3 bitop3:0xa1   ; encoding: [0x05,0x04,0x34,0xd2,0x01,0x05,0x0e,0x30]
+0x05,0x04,0x34,0xd2,0x01,0x05,0x0e,0x30
+
+# GFX950: v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5     ; encoding: [0x05,0x00,0x34,0xd2,0x7c,0xe0,0xf1,0xa1]
+0x05,0x00,0x34,0xd2,0x7c,0xe0,0xf1,0xa1
+
+# GFX950: v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:0x65 ; encoding: [0x05,0x04,0x34,0xd2,0xf0,0xf8,0xc0,0xab]
+0x05,0x04,0x34,0xd2,0xf0,0xf8,0xc0,0xab
+
+# GFX950: v_bitop3_b16 v5, v1, v2, s3               ; encoding: [0x05,0x00,0x33,0xd2,0x01,0x05,0x0e,0x00]
+0x05,0x00,0x33,0xd2,0x01,0x05,0x0e,0x00
+
+# GFX950: v_bitop3_b16 v5, v1, v2, s3 bitop3:0xa1   ; encoding: [0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30]
+0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30