Skip to content

Commit 0f3aeca

Browse files
authored
[AMDGPU][True16][CodeGen] Update and/or/xor codegen pattern for i16 (#121835)
In true16 flow, remove and/or/xor 32bit patterns for i16
1 parent a39aaf3 commit 0f3aeca

File tree

3 files changed

+25
-12
lines changed

3 files changed

+25
-12
lines changed

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1261,23 +1261,39 @@ class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
12611261
$src)
12621262
>;
12631263

1264-
foreach vt = [i16, v2i16] in {
1264+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
1265+
let True16Predicate = p in {
12651266
def : GCNPat <
1266-
(and vt:$src0, vt:$src1),
1267+
(and i16:$src0, i16:$src1),
12671268
(V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
12681269
>;
12691270

12701271
def : GCNPat <
1271-
(or vt:$src0, vt:$src1),
1272+
(or i16:$src0, i16:$src1),
12721273
(V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
12731274
>;
12741275

12751276
def : GCNPat <
1276-
(xor vt:$src0, vt:$src1),
1277+
(xor i16:$src0, i16:$src1),
12771278
(V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
12781279
>;
12791280
}
12801281

1282+
def : GCNPat <
1283+
(and v2i16:$src0, v2i16:$src1),
1284+
(V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
1285+
>;
1286+
1287+
def : GCNPat <
1288+
(or v2i16:$src0, v2i16:$src1),
1289+
(V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
1290+
>;
1291+
1292+
def : GCNPat <
1293+
(xor v2i16:$src0, v2i16:$src1),
1294+
(V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
1295+
>;
1296+
12811297
let Predicates = [Has16BitInsts, isGFX8GFX9] in {
12821298

12831299
// Undo sub x, c -> add x, -c canonicalization since c is more likely

llvm/test/CodeGen/AMDGPU/uaddsat.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,10 @@ define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) {
4242
; GFX11-TRUE16-LABEL: v_uaddsat_i8:
4343
; GFX11-TRUE16: ; %bb.0:
4444
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1
46-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0
47-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
48-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
45+
; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
46+
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
47+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4948
; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v0.h
50-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
5149
; GFX11-TRUE16-NEXT: v_min_u16 v0.l, 0xff, v0.l
5250
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
5351
;

llvm/test/CodeGen/AMDGPU/usubsat.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,8 @@ define i8 @v_usubsat_i8(i8 %lhs, i8 %rhs) {
3939
; GFX11-TRUE16-LABEL: v_usubsat_i8:
4040
; GFX11-TRUE16: ; %bb.0:
4141
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1
43-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0
44-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
42+
; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
43+
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
4544
; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, v0.h clamp
4645
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
4746
;

0 commit comments

Comments
 (0)