Skip to content

Commit 2903c24

Browse files
committed
AMDGPU: Fix overly conservative immediate operand check
The real legality check is peformed later anyway, so this was unnecessarily blocking immediate folds in handled cases. This also stops folding s_fmac_f32 to s_fmamk_f32 in a few tests, but that seems better. The globalisel changes look suspicious, it may be mishandling constants for VOP3P instructions.
1 parent 1d3dce0 commit 2903c24

File tree

10 files changed

+25
-49
lines changed

10 files changed

+25
-49
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -830,7 +830,8 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
830830
if (UseOpIdx >= Desc.getNumOperands())
831831
return false;
832832

833-
if (!AMDGPU::isSISrcInlinableOperand(Desc, UseOpIdx))
833+
// Filter out unhandled pseudos.
834+
if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx))
834835
return false;
835836

836837
uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;

llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -920,9 +920,7 @@ define amdgpu_ps i64 @s_andn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1
920920
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
921921
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
922922
; GFX6-NEXT: s_or_b32 s3, s3, s4
923-
; GFX6-NEXT: s_mov_b32 s4, -1
924-
; GFX6-NEXT: s_mov_b32 s5, s4
925-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
923+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
926924
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
927925
; GFX6-NEXT: ; return to shader part epilog
928926
;
@@ -962,9 +960,7 @@ define amdgpu_ps i64 @s_andn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inr
962960
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
963961
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
964962
; GFX6-NEXT: s_or_b32 s3, s3, s4
965-
; GFX6-NEXT: s_mov_b32 s4, -1
966-
; GFX6-NEXT: s_mov_b32 s5, s4
967-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
963+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
968964
; GFX6-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
969965
; GFX6-NEXT: ; return to shader part epilog
970966
;
@@ -1004,9 +1000,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_use(<4 x i16> inreg %src0, <4
10041000
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
10051001
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
10061002
; GFX6-NEXT: s_or_b32 s3, s3, s4
1007-
; GFX6-NEXT: s_mov_b32 s4, -1
1008-
; GFX6-NEXT: s_mov_b32 s5, s4
1009-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1003+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
10101004
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
10111005
; GFX6-NEXT: ; return to shader part epilog
10121006
;
@@ -1060,9 +1054,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_foldable_use(<4 x i16> inreg
10601054
; GFX6-NEXT: s_lshl_b32 s5, s13, 16
10611055
; GFX6-NEXT: s_and_b32 s6, s12, 0xffff
10621056
; GFX6-NEXT: s_or_b32 s5, s5, s6
1063-
; GFX6-NEXT: s_mov_b32 s6, -1
1064-
; GFX6-NEXT: s_mov_b32 s7, s6
1065-
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
1057+
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], -1
10661058
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
10671059
; GFX6-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5]
10681060
; GFX6-NEXT: ; return to shader part epilog

llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -919,9 +919,7 @@ define amdgpu_ps i64 @s_orn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1)
919919
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
920920
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
921921
; GFX6-NEXT: s_or_b32 s3, s3, s4
922-
; GFX6-NEXT: s_mov_b32 s4, -1
923-
; GFX6-NEXT: s_mov_b32 s5, s4
924-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
922+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
925923
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
926924
; GFX6-NEXT: ; return to shader part epilog
927925
;
@@ -961,9 +959,7 @@ define amdgpu_ps i64 @s_orn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inre
961959
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
962960
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
963961
; GFX6-NEXT: s_or_b32 s3, s3, s4
964-
; GFX6-NEXT: s_mov_b32 s4, -1
965-
; GFX6-NEXT: s_mov_b32 s5, s4
966-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
962+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
967963
; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
968964
; GFX6-NEXT: ; return to shader part epilog
969965
;
@@ -1003,9 +999,7 @@ define amdgpu_ps { i64, i64 } @s_orn2_v4i16_multi_use(<4 x i16> inreg %src0, <4
1003999
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
10041000
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
10051001
; GFX6-NEXT: s_or_b32 s3, s3, s4
1006-
; GFX6-NEXT: s_mov_b32 s4, -1
1007-
; GFX6-NEXT: s_mov_b32 s5, s4
1008-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1002+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
10091003
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
10101004
; GFX6-NEXT: ; return to shader part epilog
10111005
;
@@ -1059,9 +1053,7 @@ define amdgpu_ps { i64, i64 } @s_orn2_v4i16_multi_foldable_use(<4 x i16> inreg %
10591053
; GFX6-NEXT: s_lshl_b32 s5, s13, 16
10601054
; GFX6-NEXT: s_and_b32 s6, s12, 0xffff
10611055
; GFX6-NEXT: s_or_b32 s5, s5, s6
1062-
; GFX6-NEXT: s_mov_b32 s6, -1
1063-
; GFX6-NEXT: s_mov_b32 s7, s6
1064-
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
1056+
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], -1
10651057
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
10661058
; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
10671059
; GFX6-NEXT: ; return to shader part epilog

llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,13 +118,11 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
118118
; GFX7-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7]
119119
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
120120
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
121-
; GFX7-NEXT: s_mov_b32 s8, -1
122121
; GFX7-NEXT: s_or_b32 s0, s1, s0
123122
; GFX7-NEXT: s_lshl_b32 s1, s3, 16
124123
; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
125-
; GFX7-NEXT: s_mov_b32 s9, s8
126124
; GFX7-NEXT: s_or_b32 s1, s1, s2
127-
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[8:9]
125+
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], -1
128126
; GFX7-NEXT: ; return to shader part epilog
129127
;
130128
; GFX8-LABEL: scalar_xnor_v4i16_one_use:

llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,14 @@ define amdgpu_cs <2 x i32> @f() {
55
; CHECK-LABEL: f:
66
; CHECK: ; %bb.0: ; %bb
77
; CHECK-NEXT: s_mov_b32 s4, 0
8+
; CHECK-NEXT: s_mov_b32 s1, 0
89
; CHECK-NEXT: s_mov_b32 s5, s4
910
; CHECK-NEXT: s_mov_b32 s6, s4
1011
; CHECK-NEXT: s_mov_b32 s7, s4
11-
; CHECK-NEXT: s_mov_b32 s0, s4
1212
; CHECK-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
13-
; CHECK-NEXT: s_mov_b32 s1, s4
1413
; CHECK-NEXT: s_waitcnt vmcnt(0)
15-
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[0:1], v[0:1]
14+
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1]
1615
; CHECK-NEXT: v_mov_b32_e32 v1, s4
17-
; CHECK-NEXT: s_mov_b32 s1, 0
1816
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1917
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
2018
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0

llvm/test/CodeGen/AMDGPU/constrained-shift.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -192,10 +192,8 @@ define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg %a, <4 x i32> inreg %b)
192192
;
193193
; GISEL-LABEL: s_csh_v4i32:
194194
; GISEL: ; %bb.0:
195-
; GISEL-NEXT: s_mov_b32 s8, 31
196-
; GISEL-NEXT: s_mov_b32 s9, s8
197-
; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
198-
; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], s[8:9]
195+
; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], 31
196+
; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], 31
199197
; GISEL-NEXT: s_lshl_b32 s8, s0, s4
200198
; GISEL-NEXT: s_lshl_b32 s9, s1, s5
201199
; GISEL-NEXT: s_lshl_b32 s10, s2, s6

llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ body: |
1313
; CHECK-NEXT: {{ $}}
1414
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1515
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
16-
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1056964608, [[COPY1]], implicit $mode
16+
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAC_F32 1056964608, [[COPY]], [[COPY1]], implicit $mode
1717
; CHECK-NEXT: $sgpr0 = COPY %fma
1818
%0:sreg_32 = COPY $sgpr0
1919
%1:sreg_32 = COPY $sgpr1
@@ -33,7 +33,7 @@ body: |
3333
; CHECK-NEXT: {{ $}}
3434
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
3535
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
36-
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1056964608, [[COPY1]], implicit $mode
36+
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAC_F32 [[COPY]], 1056964608, [[COPY1]], implicit $mode
3737
; CHECK-NEXT: $sgpr0 = COPY %fma
3838
%0:sreg_32 = COPY $sgpr0
3939
%1:sreg_32 = COPY $sgpr1

llvm/test/CodeGen/AMDGPU/global-saddr-load.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -742,10 +742,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_0x100000001(ptr addrspace(1)
742742
;
743743
; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
744744
; GFX12-SDAG: ; %bb.0:
745-
; GFX12-SDAG-NEXT: s_mov_b32 s0, 1
746-
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
747-
; GFX12-SDAG-NEXT: s_mov_b32 s1, s0
748-
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1]
745+
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], 1
749746
; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0
750747
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
751748
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0

llvm/test/CodeGen/AMDGPU/packed-fp32.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) {
8787
; GCN-LABEL: {{^}}fadd_v2_v_lit_splat:
8888
; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
8989
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 1.0 op_sel_hi:[1,0]{{$}}
90-
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
90+
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 1.0{{$}}
9191
define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) {
9292
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
9393
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
@@ -308,7 +308,7 @@ define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) {
308308
; GCN-LABEL: {{^}}fmul_v2_v_lit_splat:
309309
; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
310310
; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0 op_sel_hi:[1,0]{{$}}
311-
; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
311+
; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0{{$}}
312312
define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) {
313313
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
314314
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
@@ -432,7 +432,7 @@ define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) {
432432
; GCN-LABEL: {{^}}fma_v2_v_lit_splat:
433433
; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4.0, 1.0
434434
; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0, 1.0 op_sel_hi:[1,0,0]{{$}}
435-
; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
435+
; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0, 1.0{{$}}
436436
define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
437437
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
438438
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
@@ -556,8 +556,8 @@ bb:
556556
; PACKED-SDAG: v_add_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, 0
557557
; PACKED-SDAG: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
558558

559-
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
560-
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
559+
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], 0{{$}}
560+
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0{{$}}
561561
define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
562562
bb:
563563
%i12 = fadd <2 x float> zeroinitializer, %arg

llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ define amdgpu_ps float @_amdgpu_ps_main() {
218218
; GFX1150-NEXT: s_mov_b32 s3, s0
219219
; GFX1150-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x0
220220
; GFX1150-NEXT: s_waitcnt lgkmcnt(0)
221-
; GFX1150-NEXT: s_fmamk_f32 s0, s1, 0x40800000, s0
221+
; GFX1150-NEXT: s_fmac_f32 s0, s1, 4.0
222222
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
223223
; GFX1150-NEXT: v_mov_b32_e32 v0, s0
224224
; GFX1150-NEXT: ; return to shader part epilog
@@ -232,7 +232,7 @@ define amdgpu_ps float @_amdgpu_ps_main() {
232232
; GFX12-NEXT: s_mov_b32 s3, s0
233233
; GFX12-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x0
234234
; GFX12-NEXT: s_wait_kmcnt 0x0
235-
; GFX12-NEXT: s_fmamk_f32 s0, s1, 0x40800000, s0
235+
; GFX12-NEXT: s_fmac_f32 s0, s1, 4.0
236236
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
237237
; GFX12-NEXT: v_mov_b32_e32 v0, s0
238238
; GFX12-NEXT: ; return to shader part epilog

0 commit comments

Comments
 (0)