Skip to content

Commit 92ba7e3

Browse files
authored
AMDGPU/GlobalISel: Do not try to form v_bitop3_b32 for SGPR results (#117940)
1 parent 9a0f251 commit 92ba7e3

File tree

2 files changed

+121
-51
lines changed

2 files changed

+121
-51
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3764,10 +3764,15 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {
37643764
if (!Subtarget->hasBitOp3Insts())
37653765
return false;
37663766

3767+
Register DstReg = MI.getOperand(0).getReg();
3768+
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3769+
const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;
3770+
if (!IsVALU)
3771+
return false;
3772+
37673773
SmallVector<Register, 3> Src;
37683774
uint8_t TTbl;
37693775
unsigned NumOpcodes;
3770-
Register DstReg = MI.getOperand(0).getReg();
37713776

37723777
std::tie(NumOpcodes, TTbl) = BitOp3_Op(DstReg, Src, *MRI);
37733778

@@ -3776,13 +3781,10 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {
37763781
if (NumOpcodes < 2 || Src.empty())
37773782
return false;
37783783

3779-
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3780-
const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;
3781-
37823784
// For a uniform case threshold should be higher to account for moves between
37833785
// VGPRs and SGPRs. It needs one operand in a VGPR, rest two can be in SGPRs
37843786
// and a readtfirstlane after.
3785-
if (NumOpcodes < 4 && !IsVALU)
3787+
if (NumOpcodes < 4)
37863788
return false;
37873789

37883790
bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32);

llvm/test/CodeGen/AMDGPU/bitop3.ll

Lines changed: 114 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -march=amdgcn -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-SDAG %s
3-
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-GISEL %s
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-SDAG %s
3+
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-GISEL %s
44

55
; ========= Single bit functions =========
66

@@ -45,10 +45,17 @@ define amdgpu_ps float @not_and_and_not_and(i32 %a, i32 %b, i32 %c) {
4545
}
4646

4747
define amdgpu_ps float @not_and_and_and(i32 %a, i32 %b, i32 %c) {
48-
; GCN-LABEL: not_and_and_and:
49-
; GCN: ; %bb.0:
50-
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:8
51-
; GCN-NEXT: ; return to shader part epilog
48+
; GFX950-SDAG-LABEL: not_and_and_and:
49+
; GFX950-SDAG: ; %bb.0:
50+
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:8
51+
; GFX950-SDAG-NEXT: ; return to shader part epilog
52+
;
53+
; GFX950-GISEL-LABEL: not_and_and_and:
54+
; GFX950-GISEL: ; %bb.0:
55+
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
56+
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
57+
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
58+
; GFX950-GISEL-NEXT: ; return to shader part epilog
5259
%nota = xor i32 %a, -1
5360
%and1 = and i32 %nota, %c
5461
%and2 = and i32 %and1, %b
@@ -70,10 +77,17 @@ define amdgpu_ps float @and_not_and_not_and(i32 %a, i32 %b, i32 %c) {
7077
}
7178

7279
define amdgpu_ps float @and_not_and_and(i32 %a, i32 %b, i32 %c) {
73-
; GCN-LABEL: and_not_and_and:
74-
; GCN: ; %bb.0:
75-
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x20
76-
; GCN-NEXT: ; return to shader part epilog
80+
; GFX950-SDAG-LABEL: and_not_and_and:
81+
; GFX950-SDAG: ; %bb.0:
82+
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x20
83+
; GFX950-SDAG-NEXT: ; return to shader part epilog
84+
;
85+
; GFX950-GISEL-LABEL: and_not_and_and:
86+
; GFX950-GISEL: ; %bb.0:
87+
; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v1
88+
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
89+
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
90+
; GFX950-GISEL-NEXT: ; return to shader part epilog
7791
%notb = xor i32 %b, -1
7892
%and1 = and i32 %a, %c
7993
%and2 = and i32 %and1, %notb
@@ -82,10 +96,17 @@ define amdgpu_ps float @and_not_and_and(i32 %a, i32 %b, i32 %c) {
8296
}
8397

8498
define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) {
85-
; GCN-LABEL: and_and_not_and:
86-
; GCN: ; %bb.0:
87-
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x40
88-
; GCN-NEXT: ; return to shader part epilog
99+
; GFX950-SDAG-LABEL: and_and_not_and:
100+
; GFX950-SDAG: ; %bb.0:
101+
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x40
102+
; GFX950-SDAG-NEXT: ; return to shader part epilog
103+
;
104+
; GFX950-GISEL-LABEL: and_and_not_and:
105+
; GFX950-GISEL: ; %bb.0:
106+
; GFX950-GISEL-NEXT: v_not_b32_e32 v2, v2
107+
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
108+
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
109+
; GFX950-GISEL-NEXT: ; return to shader part epilog
89110
%notc = xor i32 %c, -1
90111
%and1 = and i32 %a, %notc
91112
%and2 = and i32 %and1, %b
@@ -94,10 +115,16 @@ define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) {
94115
}
95116

96117
define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) {
97-
; GCN-LABEL: and_and_and:
98-
; GCN: ; %bb.0:
99-
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
100-
; GCN-NEXT: ; return to shader part epilog
118+
; GFX950-SDAG-LABEL: and_and_and:
119+
; GFX950-SDAG: ; %bb.0:
120+
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
121+
; GFX950-SDAG-NEXT: ; return to shader part epilog
122+
;
123+
; GFX950-GISEL-LABEL: and_and_and:
124+
; GFX950-GISEL: ; %bb.0:
125+
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
126+
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
127+
; GFX950-GISEL-NEXT: ; return to shader part epilog
101128
%and1 = and i32 %a, %c
102129
%and2 = and i32 %and1, %b
103130
%ret_cast = bitcast i32 %and2 to float
@@ -107,21 +134,34 @@ define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) {
107134
; ========= Multi bit functions =========
108135

109136
define amdgpu_ps float @test_12(i32 %a, i32 %b) {
110-
; GCN-LABEL: test_12:
111-
; GCN: ; %bb.0:
112-
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
113-
; GCN-NEXT: ; return to shader part epilog
137+
; GFX950-SDAG-LABEL: test_12:
138+
; GFX950-SDAG: ; %bb.0:
139+
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
140+
; GFX950-SDAG-NEXT: ; return to shader part epilog
141+
;
142+
; GFX950-GISEL-LABEL: test_12:
143+
; GFX950-GISEL: ; %bb.0:
144+
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
145+
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
146+
; GFX950-GISEL-NEXT: ; return to shader part epilog
114147
%nota = xor i32 %a, -1
115148
%and1 = and i32 %nota, %b
116149
%ret_cast = bitcast i32 %and1 to float
117150
ret float %ret_cast
118151
}
119152

120153
define amdgpu_ps float @test_63(i32 %a, i32 %b) {
121-
; GCN-LABEL: test_63:
122-
; GCN: ; %bb.0:
123-
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0x3f
124-
; GCN-NEXT: ; return to shader part epilog
154+
; GFX950-SDAG-LABEL: test_63:
155+
; GFX950-SDAG: ; %bb.0:
156+
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0x3f
157+
; GFX950-SDAG-NEXT: ; return to shader part epilog
158+
;
159+
; GFX950-GISEL-LABEL: test_63:
160+
; GFX950-GISEL: ; %bb.0:
161+
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
162+
; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v1
163+
; GFX950-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
164+
; GFX950-GISEL-NEXT: ; return to shader part epilog
125165
%nota = xor i32 %a, -1
126166
%notb = xor i32 %b, -1
127167
%or = or i32 %nota, %notb
@@ -143,10 +183,17 @@ define amdgpu_ps float @test_59(i32 %a, i32 %b, i32 %c) {
143183
}
144184

145185
define amdgpu_ps float @test_126(i32 %a, i32 %b, i32 %c) {
146-
; GCN-LABEL: test_126:
147-
; GCN: ; %bb.0:
148-
; GCN-NEXT: v_bitop3_b32 v0, v0, v2, v1 bitop3:0x7e
149-
; GCN-NEXT: ; return to shader part epilog
186+
; GFX950-SDAG-LABEL: test_126:
187+
; GFX950-SDAG: ; %bb.0:
188+
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v2, v1 bitop3:0x7e
189+
; GFX950-SDAG-NEXT: ; return to shader part epilog
190+
;
191+
; GFX950-GISEL-LABEL: test_126:
192+
; GFX950-GISEL: ; %bb.0:
193+
; GFX950-GISEL-NEXT: v_xor_b32_e32 v1, v0, v1
194+
; GFX950-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
195+
; GFX950-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
196+
; GFX950-GISEL-NEXT: ; return to shader part epilog
150197
%xor1 = xor i32 %a, %b
151198
%xor2 = xor i32 %a, %c
152199
%or = or i32 %xor1, %xor2
@@ -167,9 +214,9 @@ define amdgpu_ps float @test_12_src_overflow(i32 %a, i32 %b, i32 %c) {
167214
;
168215
; GFX950-GISEL-LABEL: test_12_src_overflow:
169216
; GFX950-GISEL: ; %bb.0:
170-
; GFX950-GISEL-NEXT: v_bitop3_b32 v3, v0, v2, v0 bitop3:0xc
171-
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:3
172-
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v3, v1, v0 bitop3:0xc8
217+
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
218+
; GFX950-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v0
219+
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
173220
; GFX950-GISEL-NEXT: ; return to shader part epilog
174221
%nota = xor i32 %a, -1
175222
%notc = xor i32 %c, -1
@@ -185,13 +232,27 @@ define amdgpu_ps float @test_12_src_overflow(i32 %a, i32 %b, i32 %c) {
185232
; This could be a single LOP3 operation with tbl = 100, but Src vector exhausted during search.
186233

187234
define amdgpu_ps float @test_100_src_overflow(i32 %a, i32 %b, i32 %c) {
188-
; GCN-LABEL: test_100_src_overflow:
189-
; GCN: ; %bb.0:
190-
; GCN-NEXT: v_bitop3_b32 v3, v1, v2, v0 bitop3:0x10
191-
; GCN-NEXT: v_bitop3_b32 v4, v0, v2, v1 bitop3:0x40
192-
; GCN-NEXT: v_bitop3_b32 v0, v1, v2, v0 bitop3:0x20
193-
; GCN-NEXT: v_or3_b32 v0, v3, v4, v0
194-
; GCN-NEXT: ; return to shader part epilog
235+
; GFX950-SDAG-LABEL: test_100_src_overflow:
236+
; GFX950-SDAG: ; %bb.0:
237+
; GFX950-SDAG-NEXT: v_bitop3_b32 v3, v1, v2, v0 bitop3:0x10
238+
; GFX950-SDAG-NEXT: v_bitop3_b32 v4, v0, v2, v1 bitop3:0x40
239+
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v1, v2, v0 bitop3:0x20
240+
; GFX950-SDAG-NEXT: v_or3_b32 v0, v3, v4, v0
241+
; GFX950-SDAG-NEXT: ; return to shader part epilog
242+
;
243+
; GFX950-GISEL-LABEL: test_100_src_overflow:
244+
; GFX950-GISEL: ; %bb.0:
245+
; GFX950-GISEL-NEXT: v_or_b32_e32 v3, v2, v0
246+
; GFX950-GISEL-NEXT: v_not_b32_e32 v3, v3
247+
; GFX950-GISEL-NEXT: v_not_b32_e32 v4, v1
248+
; GFX950-GISEL-NEXT: v_and_b32_e32 v3, v1, v3
249+
; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v0, v4
250+
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v1, v0
251+
; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v2
252+
; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v4, v2
253+
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
254+
; GFX950-GISEL-NEXT: v_or3_b32 v0, v3, v4, v0
255+
; GFX950-GISEL-NEXT: ; return to shader part epilog
195256
%or1 = or i32 %c, %a
196257
%not1 = xor i32 %or1, -1
197258
%and1 = and i32 %b, %not1
@@ -260,12 +321,19 @@ define amdgpu_ps float @uniform_3_op(i32 inreg %a, i32 inreg %b, i32 inreg %c) {
260321
}
261322

262323
define amdgpu_ps float @uniform_4_op(i32 inreg %a, i32 inreg %b, i32 inreg %c) {
263-
; GCN-LABEL: uniform_4_op:
264-
; GCN: ; %bb.0:
265-
; GCN-NEXT: v_mov_b32_e32 v0, s1
266-
; GCN-NEXT: v_mov_b32_e32 v1, s2
267-
; GCN-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:2
268-
; GCN-NEXT: ; return to shader part epilog
324+
; GFX950-SDAG-LABEL: uniform_4_op:
325+
; GFX950-SDAG: ; %bb.0:
326+
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s1
327+
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, s2
328+
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:2
329+
; GFX950-SDAG-NEXT: ; return to shader part epilog
330+
;
331+
; GFX950-GISEL-LABEL: uniform_4_op:
332+
; GFX950-GISEL: ; %bb.0:
333+
; GFX950-GISEL-NEXT: s_andn2_b32 s0, s2, s0
334+
; GFX950-GISEL-NEXT: s_andn2_b32 s0, s0, s1
335+
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s0
336+
; GFX950-GISEL-NEXT: ; return to shader part epilog
269337
%nota = xor i32 %a, -1
270338
%notb = xor i32 %b, -1
271339
%and1 = and i32 %nota, %c

0 commit comments

Comments
 (0)