Skip to content

AMDGPU: MC support for V_CVT_SCALE_SR_FP4 instructions #117795

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 27, 2024

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Nov 26, 2024

Co-authored-by: Shilei Tian [email protected]

This was referenced Nov 26, 2024
@arsenm arsenm marked this pull request as ready for review November 26, 2024 21:34
@llvmbot llvmbot added the mc Machine (object) code label Nov 26, 2024
@llvmbot
Copy link
Member

llvmbot commented Nov 26, 2024

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Co-authored-by: Shilei Tian <[email protected]>


Full diff: https://github.com/llvm/llvm-project/pull/117795.diff

5 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+4)
  • (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+2)
  • (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+31-1)
  • (modified) llvm/test/MC/AMDGPU/gfx950_asm_features.s (+96)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt (+72)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index f63bc0ea4e7b0c..3ff588c4d0a452 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -336,6 +336,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
   }
 
+  bool isPackedFP32InputMods() const {
+    return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::v2f32);
+  }
+
   bool isVReg() const {
     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
            isRegClass(AMDGPU::VReg_64RegClassID) ||
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 32ebcd06c5e53c..d08bda37292105 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1535,6 +1535,7 @@ class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
 
 def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
 def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
+def PackedF32InputModsMatchClass : PackedFPInputModsMatchClass<32>;
 
 class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
   let PrintMethod = "printOperandAndFPInputMods";
@@ -1546,6 +1547,7 @@ class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <
 
 def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
 def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
+def PackedF32InputMods : PackedFPInputMods<PackedF32InputModsMatchClass>;
 
 def MFMALdScaleModifierOp : TImmLeaf<i32, [{
   return isUInt<2>(Imm);
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 42475b18afecdc..237bb0faffff8b 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -983,6 +983,29 @@ class VOP3_CVT_SCALE_FP4_F16BF16_TiedInput_Profile<VOPProfile P> : VOP3_Profile<
   let HasExtVOP3DPP = 0;
 }
 
+class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
+    VOP3_Profile<VOPProfile<[i32, Src0Ty, i32, f32]>, VOP3_OPSEL> {
+  let InsVOP3OpSel = (ins PackedF16InputMods: $src0_modifiers, Src0RC64:$src0,
+                          Int32InputMods:     $src1_modifiers, Src1RC64:$src1,
+                          FP32InputMods:      $src2_modifiers, Src2RC64:$src2,
+                          VGPR_32:$vdst_in,   op_sel0:$op_sel);
+  let HasClamp = 0;
+  let HasExtVOP3DPP = 0;
+  let HasOpSel = 1;
+  let HasOMod = 0;
+}
+
+def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32, v2f32, i32, f32]>, VOP3_OPSEL> {
+  let InsVOP3OpSel = (ins PackedF32InputMods: $src0_modifiers, Src0RC64:$src0,
+                          Int32InputMods:     $src1_modifiers, Src1RC64:$src1,
+                          FP32InputMods:      $src2_modifiers, Src2RC64:$src2,
+                          VGPR_32:$vdst_in,   op_sel0:$op_sel);
+  let HasClamp = 0;
+  let HasExtVOP3DPP = 0;
+  let HasOpSel = 1;
+  let HasOMod = 0;
+}
+
 class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,
                                               VOP3_OPSEL> {
   let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
@@ -1049,8 +1072,12 @@ let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in
 
 let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in {
   defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
-  let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in
+  let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
     defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOP_I32_F32_F32_F32>>;
+    defm V_CVT_SCALEF32_SR_PK_FP4_F16:  VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2f16>>;
+    defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_bf16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2bf16>>;
+    defm V_CVT_SCALEF32_SR_PK_FP4_F32:  VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f32", VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile>;
+  }
   defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
   defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
 
@@ -2117,6 +2144,9 @@ defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>;
 defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>;
 defm V_CVT_SCALEF32_PK_FP4_F16 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x24c>;
 defm V_CVT_SCALEF32_PK_FP4_BF16: VOP3OpSel_Real_gfx9_forced_opsel2 <0x24d>;
+defm V_CVT_SCALEF32_SR_PK_FP4_F16:  VOP3OpSel_Real_gfx9 <0x24e>;
+defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3OpSel_Real_gfx9 <0x24f>;
+defm V_CVT_SCALEF32_SR_PK_FP4_F32:  VOP3OpSel_Real_gfx9 <0x23e>;
 }
 let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in {
 defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, "v_cvt_scalef32_pk32_f32_fp6">;
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 75022d8cf0cdd0..1a776c1050578c 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -1279,3 +1279,99 @@ v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04]
 v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x4f,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x4e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x3e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x4f,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,0,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x4e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,0,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x3e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,0,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x4f,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,0]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x4e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,0]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x3e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,0]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, -v2, v4, v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x24]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, -v2, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, -v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x84]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, -v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, |v5| ; encoding: [0x00,0x04,0x4f,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, |v5|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, |v2|, v4, v5 ; encoding: [0x00,0x01,0x4f,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, |v2|, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, -v2, v4, v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x24]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, -v2, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, -v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x84]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, -v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, |v2|, v4, v5 ; encoding: [0x00,0x01,0x4e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, |v2|, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, |v5| ; encoding: [0x00,0x04,0x4e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, |v5|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, -v[2:3], v4, v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x24]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, -v[2:3], v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, -v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x84]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, -v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, |v[2:3]|, v4, v5 ; encoding: [0x00,0x01,0x3e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, |v[2:3]|, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, |v5| ; encoding: [0x00,0x04,0x3e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, |v5|
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index adb4f78942503e..2b5b4c7770924d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -942,3 +942,75 @@
 
 # GFX950: v_pk_minimum3_f16 v8, v0, v1, s8        ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18]
 0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x4f,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x60,0x4f,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x4e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x60,0x4e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x3e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x60,0x3e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x4f,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x40,0x4f,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x4e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x40,0x4e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x3e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x40,0x3e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x4f,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x20,0x4f,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x4e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x20,0x4e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x3e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x20,0x3e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, -v2, v4, v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x24]
+0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x24
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, -v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x84]
+0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x84
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, |v5| ; encoding: [0x00,0x04,0x4f,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x04,0x4f,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, |v2|, v4, v5 ; encoding: [0x00,0x01,0x4f,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x01,0x4f,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, -v2, v4, v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x24]
+0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x24
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, -v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x84]
+0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x84
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, |v2|, v4, v5 ; encoding: [0x00,0x01,0x4e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x01,0x4e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, |v5| ; encoding: [0x00,0x04,0x4e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x04,0x4e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, -v[2:3], v4, v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x24]
+0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x24
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, -v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x84]
+0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x84
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, |v[2:3]|, v4, v5 ; encoding: [0x00,0x01,0x3e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x01,0x3e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, |v5| ; encoding: [0x00,0x04,0x3e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x04,0x3e,0xd2,0x02,0x09,0x16,0x04

Copy link
Contributor Author

arsenm commented Nov 27, 2024

Merge activity

  • Nov 26, 7:08 PM EST: A user started a stack merge that includes this pull request via Graphite.
  • Nov 26, 7:39 PM EST: Graphite rebased this pull request as part of a merge.
  • Nov 26, 7:41 PM EST: A user merged this pull request with Graphite.

@arsenm arsenm force-pushed the users/arsenm/gfx950/codegen-v_cvt_scalef32_pk_fp4_f16_bf16 branch from 9ff806f to efa35c5 Compare November 27, 2024 00:35
Base automatically changed from users/arsenm/gfx950/codegen-v_cvt_scalef32_pk_fp4_f16_bf16 to main November 27, 2024 00:38
@arsenm arsenm force-pushed the users/arsenm/gfx950/mc-v_cvt_scale_sr_fp4 branch from 50432fb to 244a560 Compare November 27, 2024 00:38
@arsenm arsenm merged commit d3c103b into main Nov 27, 2024
5 of 7 checks passed
@arsenm arsenm deleted the users/arsenm/gfx950/mc-v_cvt_scale_sr_fp4 branch November 27, 2024 00:41
searlmc1 pushed a commit to ROCm/llvm-project that referenced this pull request Feb 3, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants