Skip to content

Commit e0526b0

Browse files
authored
[AMDGPU][True16][MC] true16 for v_minmax/maxmin_f16 (llvm#119586)
Support true16 format for v_minmax/maxmin_f16 in MC. Since we are replacing `v_minmax/maxmin_f16` to `v_minmax/maxmin_f16_t16 / v_minmax/maxmin_f16_fake16` in Post-GFX11, have to update the CodeGen pattern for `v_minmax/maxmin_f16` to get CodeGen test passing.
1 parent e10b12e commit e0526b0

9 files changed

+865
-256
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3708,12 +3708,15 @@ def : IntMinMaxPat<V_MAXMIN_U32_e64, umin, umax_oneuse>;
37083708
def : IntMinMaxPat<V_MINMAX_U32_e64, umax, umin_oneuse>;
37093709
def : FPMinMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>;
37103710
def : FPMinMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>;
3711-
def : FPMinMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
3712-
def : FPMinMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
37133711
def : FPMinCanonMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>;
37143712
def : FPMinCanonMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>;
3715-
def : FPMinCanonMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
3716-
def : FPMinCanonMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
3713+
}
3714+
3715+
let True16Predicate = UseFakeTrue16Insts in {
3716+
def : FPMinMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
3717+
def : FPMinMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
3718+
def : FPMinCanonMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
3719+
def : FPMinCanonMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
37173720
}
37183721

37193722
let OtherPredicates = [isGFX9Plus] in {

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,8 +1374,8 @@ class VOP3_DOT_Profile_fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR
13741374
let SubtargetPredicate = isGFX11Plus in {
13751375
defm V_MAXMIN_F32 : VOP3Inst<"v_maxmin_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
13761376
defm V_MINMAX_F32 : VOP3Inst<"v_minmax_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
1377-
defm V_MAXMIN_F16 : VOP3Inst<"v_maxmin_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
1378-
defm V_MINMAX_F16 : VOP3Inst<"v_minmax_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
1377+
defm V_MAXMIN_F16 : VOP3Inst_t16<"v_maxmin_f16", VOP_F16_F16_F16_F16>;
1378+
defm V_MINMAX_F16 : VOP3Inst_t16<"v_minmax_f16", VOP_F16_F16_F16_F16>;
13791379
defm V_MAXMIN_U32 : VOP3Inst<"v_maxmin_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
13801380
defm V_MINMAX_U32 : VOP3Inst<"v_minmax_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
13811381
defm V_MAXMIN_I32 : VOP3Inst<"v_maxmin_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -1730,8 +1730,8 @@ defm V_PERMLANE16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25b>;
17301730
defm V_PERMLANEX16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25c>;
17311731
defm V_MAXMIN_F32 : VOP3_Realtriple_gfx11<0x25e>;
17321732
defm V_MINMAX_F32 : VOP3_Realtriple_gfx11<0x25f>;
1733-
defm V_MAXMIN_F16 : VOP3_Realtriple_gfx11<0x260>;
1734-
defm V_MINMAX_F16 : VOP3_Realtriple_gfx11<0x261>;
1733+
defm V_MAXMIN_F16 : VOP3_Realtriple_t16_and_fake16_gfx11<0x260, "v_maxmin_f16">;
1734+
defm V_MINMAX_F16 : VOP3_Realtriple_t16_and_fake16_gfx11<0x261, "v_minmax_f16">;
17351735
defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11_gfx12<0x262>;
17361736
defm V_MINMAX_U32 : VOP3_Realtriple_gfx11_gfx12<0x263>;
17371737
defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11_gfx12<0x264>;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1909,8 +1909,8 @@ multiclass VOP3_Realtriple_t16_gfx11<bits<10> op, string asmName, string opName
19091909

19101910
multiclass VOP3_Realtriple_t16_and_fake16_gfx11<bits<10> op, string asmName, string opName = NAME,
19111911
string pseudo_mnemonic = "", bit isSingle = 0> {
1912-
defm _t16: VOP3_Realtriple_t16_gfx11<op, opName#"_t16", asmName, pseudo_mnemonic, isSingle>;
1913-
defm _fake16: VOP3_Realtriple_t16_gfx11<op, opName#"_fake16", asmName, pseudo_mnemonic, isSingle>;
1912+
defm _t16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
1913+
defm _fake16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
19141914
}
19151915

19161916
multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName,

llvm/test/MC/AMDGPU/gfx11_asm_vop3.s

Lines changed: 84 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -3722,50 +3722,62 @@ v_max_u16 v5.l, v255.l, v255.h
37223722
v_max_u16 v255.h, 0xfe0b, vcc_hi
37233723
// GFX11: v_max_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
37243724

3725-
v_maxmin_f16 v5, v1, v2, s3
3726-
// GFX11: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00]
3725+
v_maxmin_f16 v5.l, v1.l, v2.l, s3
3726+
// GFX11: v_maxmin_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00]
37273727

3728-
v_maxmin_f16 v5, v255, s2, s105
3729-
// GFX11: v_maxmin_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01]
3728+
v_maxmin_f16 v5.l, v255.l, s2, s105
3729+
// GFX11: v_maxmin_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01]
37303730

3731-
v_maxmin_f16 v5, s1, v255, exec_hi
3732-
// GFX11: v_maxmin_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01]
3731+
v_maxmin_f16 v5.l, s1, v255.l, exec_hi
3732+
// GFX11: v_maxmin_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01]
37333733

3734-
v_maxmin_f16 v5, s105, s105, exec_lo
3735-
// GFX11: v_maxmin_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01]
3734+
v_maxmin_f16 v5.l, s105, s105, exec_lo
3735+
// GFX11: v_maxmin_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01]
37363736

3737-
v_maxmin_f16 v5, vcc_lo, ttmp15, v3
3738-
// GFX11: v_maxmin_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04]
3737+
v_maxmin_f16 v5.l, vcc_lo, ttmp15, v3.l
3738+
// GFX11: v_maxmin_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04]
37393739

3740-
v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255
3741-
// GFX11: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
3740+
v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.l
3741+
// GFX11: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
37423742

3743-
v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
3744-
// GFX11: v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1]
3743+
v_maxmin_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
3744+
// GFX11: v_maxmin_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1]
37453745

3746-
v_maxmin_f16 v5, m0, 0.5, m0
3747-
// GFX11: v_maxmin_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01]
3746+
v_maxmin_f16 v5.l, m0, 0.5, m0
3747+
// GFX11: v_maxmin_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01]
37483748

3749-
v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi
3750-
// GFX11: v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01]
3749+
v_maxmin_f16 v5.l, |exec_lo|, -1, vcc_hi
3750+
// GFX11: v_maxmin_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01]
37513751

3752-
v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo|
3753-
// GFX11: v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1]
3752+
v_maxmin_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
3753+
// GFX11: v_maxmin_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1]
37543754

3755-
v_maxmin_f16 v5, null, exec_lo, -|0xfe0b|
3756-
// GFX11: v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
3755+
v_maxmin_f16 v5.l, null, exec_lo, -|0xfe0b|
3756+
// GFX11: v_maxmin_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
37573757

3758-
v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc|
3759-
// GFX11: v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3]
3758+
v_maxmin_f16 v5.l, -1, -|exec_hi|, -|src_scc|
3759+
// GFX11: v_maxmin_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3]
37603760

3761-
v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2
3762-
// GFX11: v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b]
3761+
v_maxmin_f16 v5.l, 0.5, -m0, 0.5 mul:2
3762+
// GFX11: v_maxmin_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b]
37633763

3764-
v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4
3765-
// GFX11: v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33]
3764+
v_maxmin_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4
3765+
// GFX11: v_maxmin_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33]
37663766

3767-
v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2
3768-
// GFX11: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
3767+
v_maxmin_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2
3768+
// GFX11: v_maxmin_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
3769+
3770+
v_maxmin_f16 v5.l, v255.h, s2, s105
3771+
// GFX11: v_maxmin_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x60,0xd6,0xff,0x05,0xa4,0x01]
3772+
3773+
v_maxmin_f16 v5.l, s1, v255.h, exec_hi
3774+
// GFX11: v_maxmin_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x60,0xd6,0x01,0xfe,0xff,0x01]
3775+
3776+
v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.h
3777+
// GFX11: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
3778+
3779+
v_maxmin_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp div:2
3780+
// GFX11: v_maxmin_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
37693781

37703782
v_maxmin_f32 v5, v1, v2, s3
37713783
// GFX11: v_maxmin_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00]
@@ -4799,50 +4811,62 @@ v_min_u16 v5.l, v255.l, v255.h
47994811
v_min_u16 v255.h, 0xfe0b, vcc_hi
48004812
// GFX11: v_min_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
48014813

4802-
v_minmax_f16 v5, v1, v2, s3
4803-
// GFX11: v_minmax_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00]
4814+
v_minmax_f16 v5.l, v1.l, v2.l, s3
4815+
// GFX11: v_minmax_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00]
4816+
4817+
v_minmax_f16 v5.l, v255.l, s2, s105
4818+
// GFX11: v_minmax_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01]
4819+
4820+
v_minmax_f16 v5.l, s1, v255.l, exec_hi
4821+
// GFX11: v_minmax_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01]
4822+
4823+
v_minmax_f16 v5.l, s105, s105, exec_lo
4824+
// GFX11: v_minmax_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01]
4825+
4826+
v_minmax_f16 v5.l, vcc_lo, ttmp15, v3.l
4827+
// GFX11: v_minmax_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04]
48044828

4805-
v_minmax_f16 v5, v255, s2, s105
4806-
// GFX11: v_minmax_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01]
4829+
v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.l
4830+
// GFX11: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
48074831

4808-
v_minmax_f16 v5, s1, v255, exec_hi
4809-
// GFX11: v_minmax_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01]
4832+
v_minmax_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
4833+
// GFX11: v_minmax_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1]
48104834

4811-
v_minmax_f16 v5, s105, s105, exec_lo
4812-
// GFX11: v_minmax_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01]
4835+
v_minmax_f16 v5.l, m0, 0.5, m0
4836+
// GFX11: v_minmax_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01]
48134837

4814-
v_minmax_f16 v5, vcc_lo, ttmp15, v3
4815-
// GFX11: v_minmax_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04]
4838+
v_minmax_f16 v5.l, |exec_lo|, -1, vcc_hi
4839+
// GFX11: v_minmax_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01]
48164840

4817-
v_minmax_f16 v5, vcc_hi, 0xfe0b, v255
4818-
// GFX11: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
4841+
v_minmax_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
4842+
// GFX11: v_minmax_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1]
48194843

4820-
v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
4821-
// GFX11: v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1]
4844+
v_minmax_f16 v5.l, null, exec_lo, -|0xfe0b|
4845+
// GFX11: v_minmax_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
48224846

4823-
v_minmax_f16 v5, m0, 0.5, m0
4824-
// GFX11: v_minmax_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01]
4847+
v_minmax_f16 v5.l, -1, -|exec_hi|, -|src_scc|
4848+
// GFX11: v_minmax_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3]
48254849

4826-
v_minmax_f16 v5, |exec_lo|, -1, vcc_hi
4827-
// GFX11: v_minmax_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01]
4850+
v_minmax_f16 v5.l, 0.5, -m0, 0.5 mul:2
4851+
// GFX11: v_minmax_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b]
48284852

4829-
v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo|
4830-
// GFX11: v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1]
4853+
v_minmax_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4
4854+
// GFX11: v_minmax_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33]
48314855

4832-
v_minmax_f16 v5, null, exec_lo, -|0xfe0b|
4833-
// GFX11: v_minmax_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
4856+
v_minmax_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2
4857+
// GFX11: v_minmax_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
48344858

4835-
v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc|
4836-
// GFX11: v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3]
4859+
v_minmax_f16 v5.l, v255.h, s2, s105
4860+
// GFX11: v_minmax_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x61,0xd6,0xff,0x05,0xa4,0x01]
48374861

4838-
v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2
4839-
// GFX11: v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b]
4862+
v_minmax_f16 v5.l, s1, v255.h, exec_hi
4863+
// GFX11: v_minmax_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x61,0xd6,0x01,0xfe,0xff,0x01]
48404864

4841-
v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4
4842-
// GFX11: v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33]
4865+
v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.h
4866+
// GFX11: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
48434867

4844-
v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2
4845-
// GFX11: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
4868+
v_minmax_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp div:2
4869+
// GFX11: v_minmax_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
48464870

48474871
v_minmax_f32 v5, v1, v2, s3
48484872
// GFX11: v_minmax_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00]

0 commit comments

Comments
 (0)