Skip to content

Commit 9c076c0

Browse files
committed
AMDGPU: Fix overly conservative immediate operand check
The real legality check is peformed later anyway, so this was unnecessarily blocking immediate folds in handled cases. This also stops folding s_fmac_f32 to s_fmamk_f32 in a few tests, but that seems better. The globalisel changes look suspicious, it may be mishandling constants for VOP3P instructions.
1 parent 4be4133 commit 9c076c0

16 files changed

+85
-182
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -821,7 +821,8 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
821821
if (UseOpIdx >= Desc.getNumOperands())
822822
return false;
823823

824-
if (!AMDGPU::isSISrcInlinableOperand(Desc, UseOpIdx))
824+
// Filter out unhandled pseudos.
825+
if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx))
825826
return false;
826827

827828
uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;

llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -920,9 +920,7 @@ define amdgpu_ps i64 @s_andn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1
920920
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
921921
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
922922
; GFX6-NEXT: s_or_b32 s3, s3, s4
923-
; GFX6-NEXT: s_mov_b32 s4, -1
924-
; GFX6-NEXT: s_mov_b32 s5, s4
925-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
923+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
926924
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
927925
; GFX6-NEXT: ; return to shader part epilog
928926
;
@@ -962,9 +960,7 @@ define amdgpu_ps i64 @s_andn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inr
962960
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
963961
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
964962
; GFX6-NEXT: s_or_b32 s3, s3, s4
965-
; GFX6-NEXT: s_mov_b32 s4, -1
966-
; GFX6-NEXT: s_mov_b32 s5, s4
967-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
963+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
968964
; GFX6-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
969965
; GFX6-NEXT: ; return to shader part epilog
970966
;
@@ -1004,9 +1000,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_use(<4 x i16> inreg %src0, <4
10041000
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
10051001
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
10061002
; GFX6-NEXT: s_or_b32 s3, s3, s4
1007-
; GFX6-NEXT: s_mov_b32 s4, -1
1008-
; GFX6-NEXT: s_mov_b32 s5, s4
1009-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1003+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
10101004
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
10111005
; GFX6-NEXT: ; return to shader part epilog
10121006
;
@@ -1060,9 +1054,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_foldable_use(<4 x i16> inreg
10601054
; GFX6-NEXT: s_lshl_b32 s5, s13, 16
10611055
; GFX6-NEXT: s_and_b32 s6, s12, 0xffff
10621056
; GFX6-NEXT: s_or_b32 s5, s5, s6
1063-
; GFX6-NEXT: s_mov_b32 s6, -1
1064-
; GFX6-NEXT: s_mov_b32 s7, s6
1065-
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
1057+
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], -1
10661058
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
10671059
; GFX6-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5]
10681060
; GFX6-NEXT: ; return to shader part epilog

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

Lines changed: 16 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1769,9 +1769,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
17691769
; GFX9-NEXT: s_mov_b32 s0, 0
17701770
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
17711771
; GFX9-NEXT: s_waitcnt vmcnt(0)
1772-
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
17731772
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1774-
; GFX9-NEXT: s_add_i32 s0, s0, 4
1773+
; GFX9-NEXT: s_movk_i32 s0, 0x3e84
17751774
; GFX9-NEXT: scratch_store_dword off, v0, s0
17761775
; GFX9-NEXT: s_waitcnt vmcnt(0)
17771776
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1786,8 +1785,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
17861785
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
17871786
; GFX10-NEXT: v_mov_b32_e32 v0, 13
17881787
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1789-
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1790-
; GFX10-NEXT: s_add_i32 s0, s0, 4
1788+
; GFX10-NEXT: s_movk_i32 s0, 0x3e84
17911789
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
17921790
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
17931791
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1799,11 +1797,10 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
17991797
; GFX942-LABEL: store_load_large_imm_offset_kernel:
18001798
; GFX942: ; %bb.0: ; %bb
18011799
; GFX942-NEXT: v_mov_b32_e32 v0, 13
1802-
; GFX942-NEXT: s_movk_i32 s0, 0x3e80
18031800
; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
18041801
; GFX942-NEXT: s_waitcnt vmcnt(0)
18051802
; GFX942-NEXT: v_mov_b32_e32 v0, 15
1806-
; GFX942-NEXT: s_add_i32 s0, s0, 4
1803+
; GFX942-NEXT: s_movk_i32 s0, 0x3e84
18071804
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
18081805
; GFX942-NEXT: s_waitcnt vmcnt(0)
18091806
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1813,9 +1810,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18131810
; GFX11-LABEL: store_load_large_imm_offset_kernel:
18141811
; GFX11: ; %bb.0: ; %bb
18151812
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1816-
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1817-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1818-
; GFX11-NEXT: s_add_i32 s0, s0, 4
1813+
; GFX11-NEXT: s_movk_i32 s0, 0x3e84
18191814
; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
18201815
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
18211816
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1843,9 +1838,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18431838
; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0
18441839
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
18451840
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1846-
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
18471841
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1848-
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 4
1842+
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e84
18491843
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
18501844
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
18511845
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1860,8 +1854,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18601854
; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
18611855
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
18621856
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
1863-
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
1864-
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 4
1857+
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e84
18651858
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, off offset:4
18661859
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
18671860
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1873,11 +1866,10 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18731866
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
18741867
; UNALIGNED_GFX942: ; %bb.0: ; %bb
18751868
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
1876-
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
18771869
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
18781870
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
18791871
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
1880-
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s0, 4
1872+
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e84
18811873
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
18821874
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
18831875
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1887,9 +1879,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18871879
; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
18881880
; UNALIGNED_GFX11: ; %bb.0: ; %bb
18891881
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1890-
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
1891-
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1892-
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 4
1882+
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e84
18931883
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
18941884
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
18951885
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1923,13 +1913,11 @@ define void @store_load_large_imm_offset_foo() {
19231913
; GFX9-LABEL: store_load_large_imm_offset_foo:
19241914
; GFX9: ; %bb.0: ; %bb
19251915
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926-
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
19271916
; GFX9-NEXT: v_mov_b32_e32 v0, 13
1928-
; GFX9-NEXT: s_add_i32 s1, s32, s0
19291917
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
19301918
; GFX9-NEXT: s_waitcnt vmcnt(0)
19311919
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1932-
; GFX9-NEXT: s_add_i32 s0, s1, 4
1920+
; GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
19331921
; GFX9-NEXT: scratch_store_dword off, v0, s0
19341922
; GFX9-NEXT: s_waitcnt vmcnt(0)
19351923
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1940,10 +1928,8 @@ define void @store_load_large_imm_offset_foo() {
19401928
; GFX10: ; %bb.0: ; %bb
19411929
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19421930
; GFX10-NEXT: v_mov_b32_e32 v0, 13
1943-
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
19441931
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1945-
; GFX10-NEXT: s_add_i32 s1, s32, s0
1946-
; GFX10-NEXT: s_add_i32 s0, s1, 4
1932+
; GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
19471933
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
19481934
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
19491935
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1955,13 +1941,11 @@ define void @store_load_large_imm_offset_foo() {
19551941
; GFX942-LABEL: store_load_large_imm_offset_foo:
19561942
; GFX942: ; %bb.0: ; %bb
19571943
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1958-
; GFX942-NEXT: s_movk_i32 s0, 0x3e80
19591944
; GFX942-NEXT: v_mov_b32_e32 v0, 13
1960-
; GFX942-NEXT: s_add_i32 s1, s32, s0
19611945
; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
19621946
; GFX942-NEXT: s_waitcnt vmcnt(0)
19631947
; GFX942-NEXT: v_mov_b32_e32 v0, 15
1964-
; GFX942-NEXT: s_add_i32 s0, s1, 4
1948+
; GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
19651949
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
19661950
; GFX942-NEXT: s_waitcnt vmcnt(0)
19671951
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1972,10 +1956,7 @@ define void @store_load_large_imm_offset_foo() {
19721956
; GFX11: ; %bb.0: ; %bb
19731957
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19741958
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1975-
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1976-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1977-
; GFX11-NEXT: s_add_i32 s1, s32, s0
1978-
; GFX11-NEXT: s_add_i32 s0, s1, 4
1959+
; GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
19791960
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
19801961
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
19811962
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -2004,13 +1985,11 @@ define void @store_load_large_imm_offset_foo() {
20041985
; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
20051986
; UNALIGNED_GFX9: ; %bb.0: ; %bb
20061987
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007-
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
20081988
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
2009-
; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
20101989
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
20111990
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
20121991
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
2013-
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
1992+
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
20141993
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
20151994
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
20161995
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2021,10 +2000,8 @@ define void @store_load_large_imm_offset_foo() {
20212000
; UNALIGNED_GFX10: ; %bb.0: ; %bb
20222001
; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20232002
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2024-
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
20252003
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2026-
; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2027-
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
2004+
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
20282005
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
20292006
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20302007
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2036,13 +2013,11 @@ define void @store_load_large_imm_offset_foo() {
20362013
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
20372014
; UNALIGNED_GFX942: ; %bb.0: ; %bb
20382015
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039-
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
20402016
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
2041-
; UNALIGNED_GFX942-NEXT: s_add_i32 s1, s32, s0
20422017
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
20432018
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
20442019
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
2045-
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s1, 4
2020+
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
20462021
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
20472022
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
20482023
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2053,10 +2028,7 @@ define void @store_load_large_imm_offset_foo() {
20532028
; UNALIGNED_GFX11: ; %bb.0: ; %bb
20542029
; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20552030
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2056-
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
2057-
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2058-
; UNALIGNED_GFX11-NEXT: s_add_i32 s1, s32, s0
2059-
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s1, 4
2031+
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
20602032
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
20612033
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
20622034
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc

llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -919,9 +919,7 @@ define amdgpu_ps i64 @s_orn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1)
919919
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
920920
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
921921
; GFX6-NEXT: s_or_b32 s3, s3, s4
922-
; GFX6-NEXT: s_mov_b32 s4, -1
923-
; GFX6-NEXT: s_mov_b32 s5, s4
924-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
922+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
925923
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
926924
; GFX6-NEXT: ; return to shader part epilog
927925
;
@@ -961,9 +959,7 @@ define amdgpu_ps i64 @s_orn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inre
961959
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
962960
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
963961
; GFX6-NEXT: s_or_b32 s3, s3, s4
964-
; GFX6-NEXT: s_mov_b32 s4, -1
965-
; GFX6-NEXT: s_mov_b32 s5, s4
966-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
962+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
967963
; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
968964
; GFX6-NEXT: ; return to shader part epilog
969965
;
@@ -1003,9 +999,7 @@ define amdgpu_ps { i64, i64 } @s_orn2_v4i16_multi_use(<4 x i16> inreg %src0, <4
1003999
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
10041000
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
10051001
; GFX6-NEXT: s_or_b32 s3, s3, s4
1006-
; GFX6-NEXT: s_mov_b32 s4, -1
1007-
; GFX6-NEXT: s_mov_b32 s5, s4
1008-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1002+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
10091003
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
10101004
; GFX6-NEXT: ; return to shader part epilog
10111005
;
@@ -1059,9 +1053,7 @@ define amdgpu_ps { i64, i64 } @s_orn2_v4i16_multi_foldable_use(<4 x i16> inreg %
10591053
; GFX6-NEXT: s_lshl_b32 s5, s13, 16
10601054
; GFX6-NEXT: s_and_b32 s6, s12, 0xffff
10611055
; GFX6-NEXT: s_or_b32 s5, s5, s6
1062-
; GFX6-NEXT: s_mov_b32 s6, -1
1063-
; GFX6-NEXT: s_mov_b32 s7, s6
1064-
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
1056+
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], -1
10651057
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
10661058
; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
10671059
; GFX6-NEXT: ; return to shader part epilog

llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,13 +118,11 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
118118
; GFX7-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7]
119119
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
120120
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
121-
; GFX7-NEXT: s_mov_b32 s8, -1
122121
; GFX7-NEXT: s_or_b32 s0, s1, s0
123122
; GFX7-NEXT: s_lshl_b32 s1, s3, 16
124123
; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
125-
; GFX7-NEXT: s_mov_b32 s9, s8
126124
; GFX7-NEXT: s_or_b32 s1, s1, s2
127-
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[8:9]
125+
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], -1
128126
; GFX7-NEXT: ; return to shader part epilog
129127
;
130128
; GFX8-LABEL: scalar_xnor_v4i16_one_use:

llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,14 @@ define amdgpu_cs <2 x i32> @f() {
55
; CHECK-LABEL: f:
66
; CHECK: ; %bb.0: ; %bb
77
; CHECK-NEXT: s_mov_b32 s4, 0
8+
; CHECK-NEXT: s_mov_b32 s1, 0
89
; CHECK-NEXT: s_mov_b32 s5, s4
910
; CHECK-NEXT: s_mov_b32 s6, s4
1011
; CHECK-NEXT: s_mov_b32 s7, s4
11-
; CHECK-NEXT: s_mov_b32 s0, s4
1212
; CHECK-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
13-
; CHECK-NEXT: s_mov_b32 s1, s4
1413
; CHECK-NEXT: s_waitcnt vmcnt(0)
15-
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[0:1], v[0:1]
14+
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1]
1615
; CHECK-NEXT: v_mov_b32_e32 v1, s4
17-
; CHECK-NEXT: s_mov_b32 s1, 0
1816
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1917
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
2018
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0

llvm/test/CodeGen/AMDGPU/constrained-shift.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -192,10 +192,8 @@ define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg %a, <4 x i32> inreg %b)
192192
;
193193
; GISEL-LABEL: s_csh_v4i32:
194194
; GISEL: ; %bb.0:
195-
; GISEL-NEXT: s_mov_b32 s8, 31
196-
; GISEL-NEXT: s_mov_b32 s9, s8
197-
; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
198-
; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], s[8:9]
195+
; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], 31
196+
; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], 31
199197
; GISEL-NEXT: s_lshl_b32 s8, s0, s4
200198
; GISEL-NEXT: s_lshl_b32 s9, s1, s5
201199
; GISEL-NEXT: s_lshl_b32 s10, s2, s6

0 commit comments

Comments
 (0)