Skip to content

Commit 6e3c24f

Browse files
authored
[DAG] Combine (sext (sext_in_reg x)) to (sext_in_reg (any_extend x)) (#132386)
1 parent ade2276 commit 6e3c24f

File tree

3 files changed

+37
-38
lines changed

3 files changed

+37
-38
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13940,14 +13940,22 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
1394013940
return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
1394113941
N0.getOperand(0));
1394213942

13943-
// fold (sext (sext_inreg x)) -> (sext (trunc x))
1394413943
if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1394513944
SDValue N00 = N0.getOperand(0);
1394613945
EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13947-
if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13948-
(!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13949-
SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13950-
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13946+
if (N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) {
13947+
// fold (sext (sext_inreg x)) -> (sext (trunc x))
13948+
if ((!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13949+
SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13950+
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13951+
}
13952+
13953+
// If the trunc wasn't legal, try to fold to (sext_inreg (anyext x))
13954+
if ((!LegalTypes || TLI.isTypeLegal(VT)) && N0.hasOneUse()) {
13955+
SDValue ExtSrc = DAG.getAnyExtOrTrunc(N00, DL, VT);
13956+
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, ExtSrc,
13957+
N0->getOperand(1));
13958+
}
1395113959
}
1395213960
}
1395313961

llvm/test/CodeGen/AMDGPU/permute_i8.ll

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1791,25 +1791,21 @@ define hidden void @sitofp_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in
17911791
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17921792
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
17931793
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
1794-
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
1795-
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
17961794
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
17971795
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
1798-
; GFX10-NEXT: global_load_dword v4, v[2:3], off
1799-
; GFX10-NEXT: global_load_dword v9, v[0:1], off
1796+
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
1797+
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
1798+
; GFX10-NEXT: global_load_dword v4, v[0:1], off
1799+
; GFX10-NEXT: global_load_dword v9, v[2:3], off
18001800
; GFX10-NEXT: s_waitcnt vmcnt(1)
1801-
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v4
1802-
; GFX10-NEXT: s_waitcnt vmcnt(0)
1803-
; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v9
1804-
; GFX10-NEXT: v_ashrrev_i16 v2, 8, v9
1805-
; GFX10-NEXT: v_ashrrev_i16 v3, 8, v4
1806-
; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x6010205
1807-
; GFX10-NEXT: v_bfe_i32 v10, v0, 0, 8
1808-
; GFX10-NEXT: v_bfe_i32 v1, v1, 0, 8
1809-
; GFX10-NEXT: v_cvt_f32_i32_sdwa v2, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1810-
; GFX10-NEXT: v_cvt_f32_i32_sdwa v0, sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1811-
; GFX10-NEXT: v_cvt_f32_i32_sdwa v3, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1812-
; GFX10-NEXT: v_cvt_f32_i32_sdwa v1, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1801+
; GFX10-NEXT: v_ashrrev_i16 v0, 8, v4
1802+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1803+
; GFX10-NEXT: v_ashrrev_i16 v10, 8, v9
1804+
; GFX10-NEXT: v_cvt_f32_i32_sdwa v3, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
1805+
; GFX10-NEXT: v_cvt_f32_i32_sdwa v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
1806+
; GFX10-NEXT: v_perm_b32 v4, v9, v4, 0x6010205
1807+
; GFX10-NEXT: v_cvt_f32_i32_sdwa v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1808+
; GFX10-NEXT: v_cvt_f32_i32_sdwa v0, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
18131809
; GFX10-NEXT: global_store_dwordx4 v[7:8], v[0:3], off
18141810
; GFX10-NEXT: global_store_dword v[5:6], v4, off
18151811
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1821,24 +1817,20 @@ define hidden void @sitofp_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in
18211817
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v4
18221818
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4
18231819
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1824-
; GFX9-NEXT: global_load_dword v9, v[0:1], off
1825-
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v4
1826-
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
1820+
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
1821+
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
18271822
; GFX9-NEXT: global_load_dword v4, v[0:1], off
1823+
; GFX9-NEXT: global_load_dword v9, v[2:3], off
18281824
; GFX9-NEXT: s_mov_b32 s4, 0x6010205
18291825
; GFX9-NEXT: s_waitcnt vmcnt(1)
1830-
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v9
1831-
; GFX9-NEXT: v_ashrrev_i16_e32 v1, 8, v9
1832-
; GFX9-NEXT: v_bfe_i32 v10, v0, 0, 8
1833-
; GFX9-NEXT: s_waitcnt vmcnt(0)
1834-
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v4
1835-
; GFX9-NEXT: v_ashrrev_i16_e32 v3, 8, v4
1836-
; GFX9-NEXT: v_bfe_i32 v11, v2, 0, 8
1837-
; GFX9-NEXT: v_cvt_f32_i32_sdwa v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1838-
; GFX9-NEXT: v_cvt_f32_i32_sdwa v0, sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1839-
; GFX9-NEXT: v_cvt_f32_i32_sdwa v3, sext(v11) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1840-
; GFX9-NEXT: v_cvt_f32_i32_sdwa v1, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1841-
; GFX9-NEXT: v_perm_b32 v4, v4, v9, s4
1826+
; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v4
1827+
; GFX9-NEXT: s_waitcnt vmcnt(0)
1828+
; GFX9-NEXT: v_ashrrev_i16_e32 v10, 8, v9
1829+
; GFX9-NEXT: v_cvt_f32_i32_sdwa v3, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
1830+
; GFX9-NEXT: v_cvt_f32_i32_sdwa v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
1831+
; GFX9-NEXT: v_cvt_f32_i32_sdwa v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1832+
; GFX9-NEXT: v_cvt_f32_i32_sdwa v0, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1833+
; GFX9-NEXT: v_perm_b32 v4, v9, v4, s4
18421834
; GFX9-NEXT: global_store_dwordx4 v[7:8], v[0:3], off
18431835
; GFX9-NEXT: global_store_dword v[5:6], v4, off
18441836
; GFX9-NEXT: s_waitcnt vmcnt(0)

llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,7 @@ define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in)
196196
; VI-NEXT: s_load_dword s2, s[8:9], 0x8
197197
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
198198
; VI-NEXT: s_waitcnt lgkmcnt(0)
199-
; VI-NEXT: s_bfe_i32 s2, s2, 0x80000
200-
; VI-NEXT: s_sext_i32_i16 s2, s2
199+
; VI-NEXT: s_sext_i32_i8 s2, s2
201200
; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
202201
; VI-NEXT: v_mov_b32_e32 v3, s1
203202
; VI-NEXT: v_mov_b32_e32 v2, s0

0 commit comments

Comments
 (0)