Skip to content

Commit 9dba603

Browse files
committed
AMDGPU/GlobalISel: Increase max legal size to 1024
There are 1024 bit register classes defined for AGPRs. Additionally OpenCL defines vectors up to 16 x i64, and this helps those tests legalize. llvm-svn: 373350
1 parent 105e82e commit 9dba603

11 files changed

+453
-94
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ using namespace LegalityPredicates;
4040

4141

4242
static LegalityPredicate isMultiple32(unsigned TypeIdx,
43-
unsigned MaxSize = 512) {
43+
unsigned MaxSize = 1024) {
4444
return [=](const LegalityQuery &Query) {
4545
const LLT Ty = Query.Types[TypeIdx];
4646
const LLT EltTy = Ty.getScalarType();
@@ -115,7 +115,7 @@ static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
115115
};
116116
}
117117

118-
// Any combination of 32 or 64-bit elements up to 512 bits, and multiples of
118+
// Any combination of 32 or 64-bit elements up to 1024 bits, and multiples of
119119
// v2s16.
120120
static LegalityPredicate isRegisterType(unsigned TypeIdx) {
121121
return [=](const LegalityQuery &Query) {
@@ -127,7 +127,7 @@ static LegalityPredicate isRegisterType(unsigned TypeIdx) {
127127
EltSize == 128 || EltSize == 256;
128128
}
129129

130-
return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512;
130+
return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 1024;
131131
};
132132
}
133133

@@ -162,7 +162,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
162162
const LLT S96 = LLT::scalar(96);
163163
const LLT S128 = LLT::scalar(128);
164164
const LLT S256 = LLT::scalar(256);
165-
const LLT S512 = LLT::scalar(512);
165+
const LLT S1024 = LLT::scalar(1024);
166166

167167
const LLT V2S16 = LLT::vector(2, 16);
168168
const LLT V4S16 = LLT::vector(4, 16);
@@ -293,7 +293,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
293293
.legalFor({S1, S32, S64, S16, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
294294
ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
295295
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
296-
.clampScalarOrElt(0, S32, S512)
296+
.clampScalarOrElt(0, S32, S1024)
297297
.legalIf(isMultiple32(0))
298298
.widenScalarToNextPow2(0, 32)
299299
.clampMaxNumElements(0, S32, 16);
@@ -884,7 +884,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
884884
return (EltTy.getSizeInBits() == 16 ||
885885
EltTy.getSizeInBits() % 32 == 0) &&
886886
VecTy.getSizeInBits() % 32 == 0 &&
887-
VecTy.getSizeInBits() <= 512 &&
887+
VecTy.getSizeInBits() <= 1024 &&
888888
IdxTy.getSizeInBits() == 32;
889889
})
890890
.clampScalar(EltTypeIdx, S32, S64)
@@ -991,7 +991,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
991991
.fewerElementsIf(
992992
[=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
993993
scalarize(1))
994-
.clampScalar(BigTyIdx, S32, S512)
994+
.clampScalar(BigTyIdx, S32, S1024)
995995
.lowerFor({{S16, V2S16}})
996996
.widenScalarIf(
997997
[=](const LegalityQuery &Query) {
@@ -1022,7 +1022,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
10221022

10231023
return BigTy.getSizeInBits() % 16 == 0 &&
10241024
LitTy.getSizeInBits() % 16 == 0 &&
1025-
BigTy.getSizeInBits() <= 512;
1025+
BigTy.getSizeInBits() <= 1024;
10261026
})
10271027
// Any vectors left are the wrong size. Scalarize them.
10281028
.scalarize(0)

llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
//===----------------------------------------------------------------------===//
88

99
def SGPRRegBank : RegisterBank<"SGPR",
10-
[SReg_32, SReg_64, SReg_128, SReg_256, SReg_512]
10+
[SReg_32, SReg_64, SReg_128, SReg_256, SReg_512, SReg_1024]
1111
>;
1212

1313
def VGPRRegBank : RegisterBank<"VGPR",
14-
[VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512]
14+
[VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512, VReg_1024]
1515
>;
1616

1717
def SCCRegBank : RegisterBank <"SCC", [SReg_32, SCC_CLASS]>;

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1975,6 +1975,9 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
19751975
case 512:
19761976
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
19771977
&AMDGPU::SReg_512RegClass;
1978+
case 1024:
1979+
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_1024RegClass :
1980+
&AMDGPU::SReg_1024RegClass;
19781981
default:
19791982
if (Size < 32)
19801983
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
Lines changed: 118 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,47 @@
1-
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
23
---
34
name: extract512
45
legalized: true
56
regBankSelected: true
67

7-
# CHECK-LABEL: extract512
8-
# CHECK: [[BASE:%[0-9]+]]:sreg_512 = IMPLICIT_DEF
9-
# CHECK: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub0
10-
# CHECK: [[SGPR1:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub1
11-
# CHECK: [[SGPR2:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub2
12-
# CHECK: [[SGPR3:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub3
13-
# CHECK: [[SGPR4:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub4
14-
# CHECK: [[SGPR5:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub5
15-
# CHECK: [[SGPR6:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub6
16-
# CHECK: [[SGPR7:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub7
17-
# CHECK: [[SGPR8:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub8
18-
# CHECK: [[SGPR9:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub9
19-
# CHECK: [[SGPR10:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub10
20-
# CHECK: [[SGPR11:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub11
21-
# CHECK: [[SGPR12:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub12
22-
# CHECK: [[SGPR13:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub13
23-
# CHECK: [[SGPR14:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub14
24-
# CHECK: [[SGPR15:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub15
25-
# CHECK: $sgpr0 = COPY [[SGPR0]]
26-
# CHECK: $sgpr1 = COPY [[SGPR1]]
27-
# CHECK: $sgpr2 = COPY [[SGPR2]]
28-
# CHECK: $sgpr3 = COPY [[SGPR3]]
29-
# CHECK: $sgpr4 = COPY [[SGPR4]]
30-
# CHECK: $sgpr5 = COPY [[SGPR5]]
31-
# CHECK: $sgpr6 = COPY [[SGPR6]]
32-
# CHECK: $sgpr7 = COPY [[SGPR7]]
33-
# CHECK: $sgpr8 = COPY [[SGPR8]]
34-
# CHECK: $sgpr9 = COPY [[SGPR9]]
35-
# CHECK: $sgpr10 = COPY [[SGPR10]]
36-
# CHECK: $sgpr11 = COPY [[SGPR11]]
37-
# CHECK: $sgpr12 = COPY [[SGPR12]]
38-
# CHECK: $sgpr13 = COPY [[SGPR13]]
39-
# CHECK: $sgpr14 = COPY [[SGPR14]]
40-
# CHECK: $sgpr15 = COPY [[SGPR15]]
41-
428
body: |
439
bb.0:
10+
; CHECK-LABEL: name: extract512
11+
; CHECK: [[DEF:%[0-9]+]]:sreg_512 = IMPLICIT_DEF
12+
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub0
13+
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
14+
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub2
15+
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub3
16+
; CHECK: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub4
17+
; CHECK: [[COPY5:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub5
18+
; CHECK: [[COPY6:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub6
19+
; CHECK: [[COPY7:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub7
20+
; CHECK: [[COPY8:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub8
21+
; CHECK: [[COPY9:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub9
22+
; CHECK: [[COPY10:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub10
23+
; CHECK: [[COPY11:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub11
24+
; CHECK: [[COPY12:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub12
25+
; CHECK: [[COPY13:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub13
26+
; CHECK: [[COPY14:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub14
27+
; CHECK: [[COPY15:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub15
28+
; CHECK: $sgpr0 = COPY [[COPY]]
29+
; CHECK: $sgpr1 = COPY [[COPY1]]
30+
; CHECK: $sgpr2 = COPY [[COPY2]]
31+
; CHECK: $sgpr3 = COPY [[COPY3]]
32+
; CHECK: $sgpr4 = COPY [[COPY4]]
33+
; CHECK: $sgpr5 = COPY [[COPY5]]
34+
; CHECK: $sgpr6 = COPY [[COPY6]]
35+
; CHECK: $sgpr7 = COPY [[COPY7]]
36+
; CHECK: $sgpr8 = COPY [[COPY8]]
37+
; CHECK: $sgpr9 = COPY [[COPY9]]
38+
; CHECK: $sgpr10 = COPY [[COPY10]]
39+
; CHECK: $sgpr11 = COPY [[COPY11]]
40+
; CHECK: $sgpr12 = COPY [[COPY12]]
41+
; CHECK: $sgpr13 = COPY [[COPY13]]
42+
; CHECK: $sgpr14 = COPY [[COPY14]]
43+
; CHECK: $sgpr15 = COPY [[COPY15]]
44+
; CHECK: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15
4445
%0:sgpr(s512) = G_IMPLICIT_DEF
4546
%1:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 0
4647
%2:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 32
@@ -75,3 +76,84 @@ body: |
7576
$sgpr14 = COPY %15:sgpr(s32)
7677
$sgpr15 = COPY %16:sgpr(s32)
7778
SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15
79+
...
80+
81+
---
82+
name: extract_s_s32_s1024
83+
legalized: true
84+
regBankSelected: true
85+
86+
body: |
87+
bb.0:
88+
; CHECK-LABEL: name: extract_s_s32_s1024
89+
; CHECK: [[DEF:%[0-9]+]]:sreg_1024 = IMPLICIT_DEF
90+
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub0
91+
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
92+
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub2
93+
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub3
94+
; CHECK: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub4
95+
; CHECK: [[COPY5:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub5
96+
; CHECK: [[COPY6:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub6
97+
; CHECK: [[COPY7:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub7
98+
; CHECK: [[COPY8:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub8
99+
; CHECK: [[COPY9:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub9
100+
; CHECK: [[COPY10:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub10
101+
; CHECK: [[COPY11:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub11
102+
; CHECK: [[COPY12:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub12
103+
; CHECK: [[COPY13:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub13
104+
; CHECK: [[COPY14:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub14
105+
; CHECK: [[COPY15:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub15
106+
; CHECK: [[COPY16:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub16
107+
; CHECK: [[COPY17:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub17
108+
; CHECK: [[COPY18:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub18
109+
; CHECK: [[COPY19:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub19
110+
; CHECK: [[COPY20:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub20
111+
; CHECK: [[COPY21:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub21
112+
; CHECK: [[COPY22:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub22
113+
; CHECK: [[COPY23:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub23
114+
; CHECK: [[COPY24:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub24
115+
; CHECK: [[COPY25:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub25
116+
; CHECK: [[COPY26:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub26
117+
; CHECK: [[COPY27:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub27
118+
; CHECK: [[COPY28:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub28
119+
; CHECK: [[COPY29:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub29
120+
; CHECK: [[COPY30:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub30
121+
; CHECK: [[COPY31:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub31
122+
; CHECK: S_ENDPGM 0, implicit [[DEF]], implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]], implicit [[COPY10]], implicit [[COPY11]], implicit [[COPY12]], implicit [[COPY13]], implicit [[COPY14]], implicit [[COPY15]], implicit [[COPY16]], implicit [[COPY17]], implicit [[COPY18]], implicit [[COPY19]], implicit [[COPY20]], implicit [[COPY21]], implicit [[COPY22]], implicit [[COPY23]], implicit [[COPY24]], implicit [[COPY25]], implicit [[COPY26]], implicit [[COPY27]], implicit [[COPY28]], implicit [[COPY29]], implicit [[COPY30]], implicit [[COPY31]]
123+
%0:sgpr(s1024) = G_IMPLICIT_DEF
124+
%1:sgpr(s32) = G_EXTRACT %0:sgpr, 0
125+
%2:sgpr(s32) = G_EXTRACT %0:sgpr, 32
126+
%3:sgpr(s32) = G_EXTRACT %0:sgpr, 64
127+
%4:sgpr(s32) = G_EXTRACT %0:sgpr, 96
128+
%5:sgpr(s32) = G_EXTRACT %0:sgpr, 128
129+
%6:sgpr(s32) = G_EXTRACT %0:sgpr, 160
130+
%7:sgpr(s32) = G_EXTRACT %0:sgpr, 192
131+
%8:sgpr(s32) = G_EXTRACT %0:sgpr, 224
132+
%9:sgpr(s32) = G_EXTRACT %0:sgpr, 256
133+
%10:sgpr(s32) = G_EXTRACT %0:sgpr, 288
134+
%11:sgpr(s32) = G_EXTRACT %0:sgpr, 320
135+
%12:sgpr(s32) = G_EXTRACT %0:sgpr, 352
136+
%13:sgpr(s32) = G_EXTRACT %0:sgpr, 384
137+
%14:sgpr(s32) = G_EXTRACT %0:sgpr, 416
138+
%15:sgpr(s32) = G_EXTRACT %0:sgpr, 448
139+
%16:sgpr(s32) = G_EXTRACT %0:sgpr, 480
140+
141+
%17:sgpr(s32) = G_EXTRACT %0:sgpr, 512
142+
%18:sgpr(s32) = G_EXTRACT %0:sgpr, 544
143+
%19:sgpr(s32) = G_EXTRACT %0:sgpr, 576
144+
%20:sgpr(s32) = G_EXTRACT %0:sgpr, 608
145+
%21:sgpr(s32) = G_EXTRACT %0:sgpr, 640
146+
%22:sgpr(s32) = G_EXTRACT %0:sgpr, 672
147+
%23:sgpr(s32) = G_EXTRACT %0:sgpr, 704
148+
%24:sgpr(s32) = G_EXTRACT %0:sgpr, 736
149+
%25:sgpr(s32) = G_EXTRACT %0:sgpr, 768
150+
%26:sgpr(s32) = G_EXTRACT %0:sgpr, 800
151+
%27:sgpr(s32) = G_EXTRACT %0:sgpr, 832
152+
%28:sgpr(s32) = G_EXTRACT %0:sgpr, 864
153+
%29:sgpr(s32) = G_EXTRACT %0:sgpr, 896
154+
%30:sgpr(s32) = G_EXTRACT %0:sgpr, 928
155+
%31:sgpr(s32) = G_EXTRACT %0:sgpr, 960
156+
%32:sgpr(s32) = G_EXTRACT %0:sgpr, 992
157+
158+
S_ENDPGM 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31, implicit %32
159+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ body: |
121121
; GCN-LABEL: name: implicit_def_p3_vgpr
122122
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
123123
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
124+
; GCN: $m0 = S_MOV_B32 -1
124125
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
125126
%0:vgpr(p3) = G_IMPLICIT_DEF
126127
%1:vgpr(s32) = G_CONSTANT i32 4
@@ -203,3 +204,32 @@ body: |
203204
%0:vcc(s1) = G_IMPLICIT_DEF
204205
S_ENDPGM 0, implicit %0
205206
...
207+
208+
---
209+
210+
name: implicit_def_s1024_sgpr
211+
legalized: true
212+
regBankSelected: true
213+
214+
body: |
215+
bb.0:
216+
; GCN-LABEL: name: implicit_def_s1024_sgpr
217+
; GCN: [[DEF:%[0-9]+]]:sreg_1024 = IMPLICIT_DEF
218+
; GCN: S_ENDPGM 0, implicit [[DEF]]
219+
%0:sgpr(s1024) = G_IMPLICIT_DEF
220+
S_ENDPGM 0, implicit %0
221+
...
222+
---
223+
224+
name: implicit_def_s1024_vgpr
225+
legalized: true
226+
regBankSelected: true
227+
228+
body: |
229+
bb.0:
230+
; GCN-LABEL: name: implicit_def_s1024_vgpr
231+
; GCN: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
232+
; GCN: S_ENDPGM 0, implicit [[DEF]]
233+
%0:vgpr(s1024) = G_IMPLICIT_DEF
234+
S_ENDPGM 0, implicit %0
235+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,3 +563,29 @@ body: |
563563
%2:vreg_64(s64) = G_MERGE_VALUES %0, %1
564564
S_ENDPGM 0, implicit %2
565565
...
566+
567+
---
568+
name: test_merge_values_s_s1024_s_s256_s_s256_s_s256_s_s256
569+
legalized: true
570+
regBankSelected: true
571+
tracksRegLiveness: true
572+
573+
body: |
574+
bb.0:
575+
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
576+
577+
; GCN-LABEL: name: test_merge_values_s_s1024_s_s256_s_s256_s_s256_s_s256
578+
; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
579+
; GCN: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
580+
; GCN: [[DEF:%[0-9]+]]:sreg_256 = IMPLICIT_DEF
581+
; GCN: [[COPY1:%[0-9]+]]:sreg_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
582+
; GCN: [[DEF1:%[0-9]+]]:sreg_256 = IMPLICIT_DEF
583+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_1024 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23, [[DEF1]], %subreg.sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
584+
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
585+
%0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7,
586+
%1:sgpr(s256) = G_IMPLICIT_DEF
587+
%2:sgpr(s256) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
588+
%3:sgpr(s256) = G_IMPLICIT_DEF
589+
%4:sgpr(s1024) = G_MERGE_VALUES %0, %1, %2, %3
590+
S_ENDPGM 0, implicit %4
591+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,11 @@ body: |
183183
bb.0:
184184
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
185185
186+
; GCN-LABEL: name: test_unmerge_values_s_s64_s_s64_s64_s_s192
187+
; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
188+
; GCN: [[DEF:%[0-9]+]]:sgpr(s192) = G_IMPLICIT_DEF
189+
; GCN: [[UV:%[0-9]+]]:sgpr(s64), [[UV1:%[0-9]+]]:sgpr(s64), [[UV2:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[DEF]](s192)
190+
; GCN: S_ENDPGM 0, implicit [[UV]](s64), implicit [[UV1]](s64), implicit [[UV2]](s64)
186191
%0:sgpr(s192) = G_IMPLICIT_DEF
187192
%1:sgpr(s64), %2:sgpr(s64), %3:sgpr(s64) = G_UNMERGE_VALUES %0
188193
S_ENDPGM 0, implicit %1, implicit %2, implicit %3
@@ -229,3 +234,26 @@ body: |
229234
%1:vgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES %0
230235
S_ENDPGM 0, implicit %1, implicit %2
231236
...
237+
238+
---
239+
name: test_unmerge_values_s_s256_s_s1024
240+
legalized: true
241+
regBankSelected: true
242+
tracksRegLiveness: true
243+
244+
body: |
245+
bb.0:
246+
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
247+
248+
; GCN-LABEL: name: test_unmerge_values_s_s256_s_s1024
249+
; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
250+
; GCN: [[DEF:%[0-9]+]]:sreg_1024 = IMPLICIT_DEF
251+
; GCN: [[COPY:%[0-9]+]]:sreg_256 = COPY [[DEF]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7
252+
; GCN: [[COPY1:%[0-9]+]]:sreg_256 = COPY [[DEF]].sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
253+
; GCN: [[COPY2:%[0-9]+]]:sreg_256 = COPY [[DEF]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23
254+
; GCN: [[COPY3:%[0-9]+]]:sreg_256 = COPY [[DEF]].sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
255+
; GCN: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]]
256+
%0:sgpr(s1024) = G_IMPLICIT_DEF
257+
%1:sgpr(s256), %2:sgpr(s256), %3:sgpr(s256), %4:sgpr(s256) = G_UNMERGE_VALUES %0
258+
S_ENDPGM 0, implicit %1, implicit %2, implicit %3, implicit %4
259+
...

0 commit comments

Comments
 (0)