Skip to content

Commit 5cc033b

Browse files
authored
[AMDGPU][True16][CodeGen] fshr true16 pattern (#129085)
true16 pattern for fshr. GlobalIsel will be enabled latter when merge_value selection is supported in true16 mode
1 parent 1b46db7 commit 5cc033b

File tree

3 files changed

+948
-196
lines changed

3 files changed

+948
-196
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2511,6 +2511,33 @@ def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
25112511
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
25122512
} // end True16Predicate = NotHasTrue16BitInsts
25132513

2514+
let True16Predicate = UseRealTrue16Insts in {
2515+
def : GCNPat <
2516+
(rotr i32:$src0, i32:$src1),
2517+
(V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,
2518+
/* src1_modifiers */ 0, $src0,
2519+
/* src2_modifiers */ 0,
2520+
(EXTRACT_SUBREG $src1, lo16),
2521+
/* clamp */ 0, /* op_sel */ 0)
2522+
>;
2523+
2524+
def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
2525+
(V_ALIGNBIT_B32_t16_e64 0, /* src0_modifiers */
2526+
(i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
2527+
0, /* src1_modifiers */
2528+
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)),
2529+
0, /* src2_modifiers */
2530+
(i16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)),
2531+
/* clamp */ 0, /* op_sel */ 0)>;
2532+
2533+
def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
2534+
(V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,
2535+
/* src1_modifiers */ 0, $src1,
2536+
/* src2_modifiers */ 0,
2537+
(EXTRACT_SUBREG VGPR_32:$src2, lo16),
2538+
/* clamp */ 0, /* op_sel */ 0)>;
2539+
} // end True16Predicate = UseRealTrue16Insts
2540+
25142541
let True16Predicate = UseFakeTrue16Insts in {
25152542
def : GCNPat <
25162543
(rotr i32:$src0, i32:$src1),

llvm/test/CodeGen/AMDGPU/bswap.ll

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -737,25 +737,15 @@ define i64 @v_bswap_i48(i64 %src) {
737737
; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v2
738738
; VI-NEXT: s_setpc_b64 s[30:31]
739739
;
740-
; GFX11-REAL16-LABEL: v_bswap_i48:
741-
; GFX11-REAL16: ; %bb.0:
742-
; GFX11-REAL16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743-
; GFX11-REAL16-NEXT: v_perm_b32 v2, 0, v0, 0x10203
744-
; GFX11-REAL16-NEXT: v_perm_b32 v1, 0, v1, 0x10203
745-
; GFX11-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
746-
; GFX11-REAL16-NEXT: v_lshrrev_b64 v[0:1], 16, v[1:2]
747-
; GFX11-REAL16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
748-
; GFX11-REAL16-NEXT: s_setpc_b64 s[30:31]
749-
;
750-
; GFX11-FAKE16-LABEL: v_bswap_i48:
751-
; GFX11-FAKE16: ; %bb.0:
752-
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
753-
; GFX11-FAKE16-NEXT: v_perm_b32 v2, 0, v0, 0x10203
754-
; GFX11-FAKE16-NEXT: v_perm_b32 v0, 0, v1, 0x10203
755-
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
756-
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
757-
; GFX11-FAKE16-NEXT: v_alignbit_b32 v0, v2, v0, 16
758-
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
740+
; GFX11-LABEL: v_bswap_i48:
741+
; GFX11: ; %bb.0:
742+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743+
; GFX11-NEXT: v_perm_b32 v2, 0, v0, 0x10203
744+
; GFX11-NEXT: v_perm_b32 v0, 0, v1, 0x10203
745+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
746+
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v2
747+
; GFX11-NEXT: v_alignbit_b32 v0, v2, v0, 16
748+
; GFX11-NEXT: s_setpc_b64 s[30:31]
759749
%trunc = trunc i64 %src to i48
760750
%bswap = call i48 @llvm.bswap.i48(i48 %trunc)
761751
%zext = zext i48 %bswap to i64

0 commit comments

Comments
 (0)