Skip to content

AMDGPU: Add minimum3/maximum3 pkf16 for gfx950 encodings #117601

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@ def FeatureMinimum3Maximum3F16 : SubtargetFeature<"minimum3-maximum3-f16",
"Has v_minimum3_f16 and v_maximum3_f16 instructions"
>;

def FeatureMinimum3Maximum3PKF16 : SubtargetFeature<"minimum3-maximum3-pkf16",
"HasMinimum3Maximum3PKF16",
"true",
"Has v_pk_minimum3_f16 and v_pk_maximum3_f16 instructions"
>;

def FeatureSupportsXNACK : SubtargetFeature<"xnack-support",
"SupportsXNACK",
"true",
Expand Down Expand Up @@ -432,7 +438,8 @@ def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
FeatureFP4ConversionScaleInsts,
FeatureFP6BF6ConversionScaleInsts,
FeatureF16BF16ToFP6BF6ConversionScaleInsts,
FeatureMinimum3Maximum3F32
FeatureMinimum3Maximum3F32,
FeatureMinimum3Maximum3PKF16
]
>;

Expand Down Expand Up @@ -2146,6 +2153,10 @@ def HasMinimum3Maximum3F16 :
Predicate<"Subtarget->hasMinimum3Maximum3F16()">,
AssemblerPredicate<(all_of FeatureMinimum3Maximum3F16)>;

def HasMinimum3Maximum3PKF16 :
Predicate<"Subtarget->hasMinimum3Maximum3PKF16()">,
AssemblerPredicate<(all_of FeatureMinimum3Maximum3PKF16)>;


def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasAshrPkInsts = false;
bool HasMinimum3Maximum3F32 = false;
bool HasMinimum3Maximum3F16 = false;
bool HasMinimum3Maximum3PKF16 = false;

bool RequiresCOV6 = false;

Expand Down Expand Up @@ -1348,6 +1349,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return HasMinimum3Maximum3F16;
}

bool hasMinimum3Maximum3PKF16() const {
return HasMinimum3Maximum3PKF16;
}

/// \returns The maximum number of instructions that can be enclosed in an
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
/// instruction.
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ def : VOP3PSatPat<usubsat, V_PK_SUB_U16>;
def : VOP3PSatPat<ssubsat, V_PK_SUB_I16>;
} // End SubtargetPredicate = HasVOP3PInsts

let SubtargetPredicate = HasMinimum3Maximum3PKF16, FPDPRounding = 1 in {
defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
}

// TODO: Make sure we're doing the right thing with denormals. Note
// that FMA and MAD will differ.
multiclass MadFmaMixPats<SDPatternOperator fma_like,
Expand Down Expand Up @@ -2050,6 +2055,9 @@ defm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>;
defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>;
defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>;

defm V_PK_MINIMUM3_F16 : VOP3P_Real_vi <0x1b>;
defm V_PK_MAXIMUM3_F16 : VOP3P_Real_vi <0x1c>;

defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>;
defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>;
defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>;
Expand Down
97 changes: 97 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950_asm_features.s
Original file line number Diff line number Diff line change
Expand Up @@ -1182,3 +1182,100 @@ v_maximum3_f32 v1, v2, s8, v3
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
v_minimum3_f32 v0, v1, v2, v3


// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c]
v_pk_minimum3_f16 v1, v2, v3, v4

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_minimum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b]
v_pk_minimum3_f16 v1, v2, v3, 2.0

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_minimum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c]
v_pk_minimum3_f16 v1, v2, 2.0, v3

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_minimum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c]
v_pk_minimum3_f16 v1, 2.0, v2, v3

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c]
v_pk_minimum3_f16 v1, v2, v3, v4 clamp

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_minimum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c]
v_pk_minimum3_f16 v8, v0, s8, v1

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_minimum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18]
v_pk_minimum3_f16 v8, v0, v1, s8

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
v_pk_minimum3_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04]
v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04]
v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c]
v_pk_maximum3_f16 v1, v2, v3, v4

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_maximum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b]
v_pk_maximum3_f16 v1, v2, v3, 2.0

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_maximum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c]
v_pk_maximum3_f16 v1, v2, 2.0, v3

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_maximum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c]
v_pk_maximum3_f16 v1, 2.0, v2, v3

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c]
v_pk_maximum3_f16 v1, v2, v3, v4 clamp

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_maximum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c]
v_pk_maximum3_f16 v8, v0, s8, v1

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_maximum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18]
v_pk_maximum3_f16 v8, v0, v1, s8

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
v_pk_maximum3_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04]
v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04]
v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
6 changes: 6 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950_err.s
Original file line number Diff line number Diff line change
Expand Up @@ -386,3 +386,9 @@ v_minimum3_f32 v0, s1, s2, v3

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
v_minimum3_f32 v0, v1, v2, 0xdeadbeef

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_pk_minimum3_f16 v0, s1, s2, v3

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_pk_maximum3_f16 v0, s1, s2, v3
61 changes: 61 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -881,3 +881,64 @@

# GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04


# GFX950: v_pk_maximum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c]
0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c

# GFX950: v_pk_maximum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c]
0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c

# GFX950: v_pk_maximum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b]
0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b

# GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c]
0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c

# GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c]
0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c

# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c

# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04]
0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04

# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04]
0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04

# GFX950: v_pk_maximum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c]
0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c

# GFX950: v_pk_maximum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18]
0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18

# GFX950: v_pk_minimum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c]
0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c

# GFX950: v_pk_minimum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c]
0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c

# GFX950: v_pk_minimum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b]
0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b

# GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c]
0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c

# GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c]
0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c

# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c

# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04]
0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04

# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04]
0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04

# GFX950: v_pk_minimum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c]
0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c

# GFX950: v_pk_minimum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18]
0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18
Loading