Skip to content

Commit 209094e

Browse files
committed
AMDGPU/GlobalISel: Start matching s_lshlN_add_u32 instructions
Use a hack to only enable this for GlobalISel. Technically this also works with SelectionDAG, but the divergence selection isn't reliable enough and a few cases fail, but I have no desire to spend time writing the manual expansion code for it. The DAG actually does a better job since it catches using v_add_lshl_u32 in the mixed SGPR/VGPR cases.
1 parent e4dfc9f commit 209094e

File tree

3 files changed

+432
-4
lines changed

3 files changed

+432
-4
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,16 @@ def add_ctpop : PatFrag <
630630
(add (ctpop $src0), $src1)
631631
>;
632632

633+
foreach I = 1-4 in {
634+
def shl#I#_add : PatFrag <
635+
(ops node:$src0, node:$src1),
636+
(add (shl_oneuse $src0, (i32 I)), $src1)> {
637+
// FIXME: Poor substitute for disabling pattern in SelectionDAG
638+
let PredicateCode = [{return false;}];
639+
let GISelPredicateCode = [{return true;}];
640+
}
641+
}
642+
633643
multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
634644
SDTypeProfile tc = SDTAtomic2,
635645
bit IsInt = 1> {

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -613,10 +613,18 @@ let SubtargetPredicate = isGFX9Plus in {
613613
def S_PACK_HH_B32_B16 : SOP2_32<"s_pack_hh_b32_b16">;
614614

615615
let Defs = [SCC] in {
616-
def S_LSHL1_ADD_U32 : SOP2_32<"s_lshl1_add_u32">;
617-
def S_LSHL2_ADD_U32 : SOP2_32<"s_lshl2_add_u32">;
618-
def S_LSHL3_ADD_U32 : SOP2_32<"s_lshl3_add_u32">;
619-
def S_LSHL4_ADD_U32 : SOP2_32<"s_lshl4_add_u32">;
616+
def S_LSHL1_ADD_U32 : SOP2_32<"s_lshl1_add_u32",
617+
[(set i32:$sdst, (shl1_add SSrc_b32:$src0, SSrc_b32:$src1))]
618+
>;
619+
def S_LSHL2_ADD_U32 : SOP2_32<"s_lshl2_add_u32",
620+
[(set i32:$sdst, (shl2_add SSrc_b32:$src0, SSrc_b32:$src1))]
621+
>;
622+
def S_LSHL3_ADD_U32 : SOP2_32<"s_lshl3_add_u32",
623+
[(set i32:$sdst, (shl3_add SSrc_b32:$src0, SSrc_b32:$src1))]
624+
>;
625+
def S_LSHL4_ADD_U32 : SOP2_32<"s_lshl4_add_u32",
626+
[(set i32:$sdst, (shl4_add SSrc_b32:$src0, SSrc_b32:$src1))]
627+
>;
620628
} // End Defs = [SCC]
621629

622630
let isCommutable = 1 in {

0 commit comments

Comments
 (0)