Skip to content

Commit a10d36d

Browse files
committed
[GlobalISel][AMDGPU] Fix handling of v2i128 type for AND, OR, XOR
Change-Id: I709d434e111f61e867c4fc284f1f4e768a083015
1 parent c275fdc commit a10d36d

File tree

4 files changed

+365
-1
lines changed

4 files changed

+365
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,18 @@ static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
119119
};
120120
}
121121

122+
static LegalizeMutation breakCurrentEltsToSize32Or64(unsigned TypeIdx) {
123+
return [=](const LegalityQuery &Query) {
124+
const LLT Ty = Query.Types[TypeIdx];
125+
const LLT EltTy = Ty.getElementType();
126+
const int Size = Ty.getSizeInBits();
127+
const int EltSize = EltTy.getSizeInBits();
128+
const unsigned TargetEltSize = EltSize % 64 == 0 ? 64 : 32;
129+
const unsigned NewNumElts = (Size + (TargetEltSize - 1)) / TargetEltSize;
130+
return std::pair(TypeIdx, LLT::fixed_vector(NewNumElts, TargetEltSize));
131+
};
132+
}
133+
122134
// Increase the number of vector elements to reach the next multiple of 32-bit
123135
// type.
124136
static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
@@ -875,7 +887,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
875887
.legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
876888
.clampScalar(0, S32, S64)
877889
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
878-
.fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0))
890+
.fewerElementsIf(all(vectorWiderThan(0, 64), scalarOrEltNarrowerThan(0, 64)), fewerEltsToSize64Vector(0))
891+
.bitcastIf(all(vectorWiderThan(0, 64), scalarOrEltWiderThan(0, 64)), breakCurrentEltsToSize32Or64(0))
879892
.widenScalarToNextPow2(0)
880893
.scalarize(0);
881894

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
3+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
4+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
5+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s
7+
8+
define <2 x i128> @v_and_v2i128(<2 x i128> %a, <2 x i128> %b) {
9+
; GFX7-LABEL: v_and_v2i128:
10+
; GFX7: ; %bb.0:
11+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12+
; GFX7-NEXT: v_and_b32_e32 v0, v0, v8
13+
; GFX7-NEXT: v_and_b32_e32 v1, v1, v9
14+
; GFX7-NEXT: v_and_b32_e32 v2, v2, v10
15+
; GFX7-NEXT: v_and_b32_e32 v3, v3, v11
16+
; GFX7-NEXT: v_and_b32_e32 v4, v4, v12
17+
; GFX7-NEXT: v_and_b32_e32 v5, v5, v13
18+
; GFX7-NEXT: v_and_b32_e32 v6, v6, v14
19+
; GFX7-NEXT: v_and_b32_e32 v7, v7, v15
20+
; GFX7-NEXT: s_setpc_b64 s[30:31]
21+
;
22+
; GFX9-LABEL: v_and_v2i128:
23+
; GFX9: ; %bb.0:
24+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25+
; GFX9-NEXT: v_and_b32_e32 v0, v0, v8
26+
; GFX9-NEXT: v_and_b32_e32 v1, v1, v9
27+
; GFX9-NEXT: v_and_b32_e32 v2, v2, v10
28+
; GFX9-NEXT: v_and_b32_e32 v3, v3, v11
29+
; GFX9-NEXT: v_and_b32_e32 v4, v4, v12
30+
; GFX9-NEXT: v_and_b32_e32 v5, v5, v13
31+
; GFX9-NEXT: v_and_b32_e32 v6, v6, v14
32+
; GFX9-NEXT: v_and_b32_e32 v7, v7, v15
33+
; GFX9-NEXT: s_setpc_b64 s[30:31]
34+
;
35+
; GFX8-LABEL: v_and_v2i128:
36+
; GFX8: ; %bb.0:
37+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38+
; GFX8-NEXT: v_and_b32_e32 v0, v0, v8
39+
; GFX8-NEXT: v_and_b32_e32 v1, v1, v9
40+
; GFX8-NEXT: v_and_b32_e32 v2, v2, v10
41+
; GFX8-NEXT: v_and_b32_e32 v3, v3, v11
42+
; GFX8-NEXT: v_and_b32_e32 v4, v4, v12
43+
; GFX8-NEXT: v_and_b32_e32 v5, v5, v13
44+
; GFX8-NEXT: v_and_b32_e32 v6, v6, v14
45+
; GFX8-NEXT: v_and_b32_e32 v7, v7, v15
46+
; GFX8-NEXT: s_setpc_b64 s[30:31]
47+
;
48+
; GFX10-LABEL: v_and_v2i128:
49+
; GFX10: ; %bb.0:
50+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51+
; GFX10-NEXT: v_and_b32_e32 v0, v0, v8
52+
; GFX10-NEXT: v_and_b32_e32 v1, v1, v9
53+
; GFX10-NEXT: v_and_b32_e32 v2, v2, v10
54+
; GFX10-NEXT: v_and_b32_e32 v3, v3, v11
55+
; GFX10-NEXT: v_and_b32_e32 v4, v4, v12
56+
; GFX10-NEXT: v_and_b32_e32 v5, v5, v13
57+
; GFX10-NEXT: v_and_b32_e32 v6, v6, v14
58+
; GFX10-NEXT: v_and_b32_e32 v7, v7, v15
59+
; GFX10-NEXT: s_setpc_b64 s[30:31]
60+
%and = and <2 x i128> %a, %b
61+
ret <2 x i128> %and
62+
}
63+
64+
define <2 x i128> @v_and_v2i128_inline_imm(<2 x i128> %a) {
65+
; GFX7-LABEL: v_and_v2i128_inline_imm:
66+
; GFX7: ; %bb.0:
67+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68+
; GFX7-NEXT: s_mov_b64 s[4:5], 64
69+
; GFX7-NEXT: s_mov_b64 s[6:7], 0
70+
; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5]
71+
; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7]
72+
; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
73+
; GFX7-NEXT: v_and_b32_e32 v1, s5, v1
74+
; GFX7-NEXT: v_and_b32_e32 v2, s6, v2
75+
; GFX7-NEXT: v_and_b32_e32 v3, s7, v3
76+
; GFX7-NEXT: v_and_b32_e32 v4, s4, v4
77+
; GFX7-NEXT: v_and_b32_e32 v5, s5, v5
78+
; GFX7-NEXT: v_and_b32_e32 v6, s6, v6
79+
; GFX7-NEXT: v_and_b32_e32 v7, s7, v7
80+
; GFX7-NEXT: s_setpc_b64 s[30:31]
81+
;
82+
; GFX9-LABEL: v_and_v2i128_inline_imm:
83+
; GFX9: ; %bb.0:
84+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85+
; GFX9-NEXT: s_mov_b64 s[4:5], 64
86+
; GFX9-NEXT: s_mov_b64 s[6:7], 0
87+
; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5]
88+
; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7]
89+
; GFX9-NEXT: v_and_b32_e32 v0, s4, v0
90+
; GFX9-NEXT: v_and_b32_e32 v1, s5, v1
91+
; GFX9-NEXT: v_and_b32_e32 v2, s6, v2
92+
; GFX9-NEXT: v_and_b32_e32 v3, s7, v3
93+
; GFX9-NEXT: v_and_b32_e32 v4, s4, v4
94+
; GFX9-NEXT: v_and_b32_e32 v5, s5, v5
95+
; GFX9-NEXT: v_and_b32_e32 v6, s6, v6
96+
; GFX9-NEXT: v_and_b32_e32 v7, s7, v7
97+
; GFX9-NEXT: s_setpc_b64 s[30:31]
98+
;
99+
; GFX8-LABEL: v_and_v2i128_inline_imm:
100+
; GFX8: ; %bb.0:
101+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102+
; GFX8-NEXT: s_mov_b64 s[4:5], 64
103+
; GFX8-NEXT: s_mov_b64 s[6:7], 0
104+
; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5]
105+
; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7]
106+
; GFX8-NEXT: v_and_b32_e32 v0, s4, v0
107+
; GFX8-NEXT: v_and_b32_e32 v1, s5, v1
108+
; GFX8-NEXT: v_and_b32_e32 v2, s6, v2
109+
; GFX8-NEXT: v_and_b32_e32 v3, s7, v3
110+
; GFX8-NEXT: v_and_b32_e32 v4, s4, v4
111+
; GFX8-NEXT: v_and_b32_e32 v5, s5, v5
112+
; GFX8-NEXT: v_and_b32_e32 v6, s6, v6
113+
; GFX8-NEXT: v_and_b32_e32 v7, s7, v7
114+
; GFX8-NEXT: s_setpc_b64 s[30:31]
115+
%and = and <2 x i128> %a, <i128 64, i128 64>
116+
ret <2 x i128> %and
117+
}
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
3+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
4+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
5+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s
7+
8+
define <2 x i128> @v_or_v2i128(<2 x i128> %a, <2 x i128> %b) {
9+
; GFX7-LABEL: v_or_v2i128:
10+
; GFX7: ; %bb.0:
11+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12+
; GFX7-NEXT: v_or_b32_e32 v0, v0, v8
13+
; GFX7-NEXT: v_or_b32_e32 v1, v1, v9
14+
; GFX7-NEXT: v_or_b32_e32 v2, v2, v10
15+
; GFX7-NEXT: v_or_b32_e32 v3, v3, v11
16+
; GFX7-NEXT: v_or_b32_e32 v4, v4, v12
17+
; GFX7-NEXT: v_or_b32_e32 v5, v5, v13
18+
; GFX7-NEXT: v_or_b32_e32 v6, v6, v14
19+
; GFX7-NEXT: v_or_b32_e32 v7, v7, v15
20+
; GFX7-NEXT: s_setpc_b64 s[30:31]
21+
;
22+
; GFX9-LABEL: v_or_v2i128:
23+
; GFX9: ; %bb.0:
24+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25+
; GFX9-NEXT: v_or_b32_e32 v0, v0, v8
26+
; GFX9-NEXT: v_or_b32_e32 v1, v1, v9
27+
; GFX9-NEXT: v_or_b32_e32 v2, v2, v10
28+
; GFX9-NEXT: v_or_b32_e32 v3, v3, v11
29+
; GFX9-NEXT: v_or_b32_e32 v4, v4, v12
30+
; GFX9-NEXT: v_or_b32_e32 v5, v5, v13
31+
; GFX9-NEXT: v_or_b32_e32 v6, v6, v14
32+
; GFX9-NEXT: v_or_b32_e32 v7, v7, v15
33+
; GFX9-NEXT: s_setpc_b64 s[30:31]
34+
;
35+
; GFX8-LABEL: v_or_v2i128:
36+
; GFX8: ; %bb.0:
37+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38+
; GFX8-NEXT: v_or_b32_e32 v0, v0, v8
39+
; GFX8-NEXT: v_or_b32_e32 v1, v1, v9
40+
; GFX8-NEXT: v_or_b32_e32 v2, v2, v10
41+
; GFX8-NEXT: v_or_b32_e32 v3, v3, v11
42+
; GFX8-NEXT: v_or_b32_e32 v4, v4, v12
43+
; GFX8-NEXT: v_or_b32_e32 v5, v5, v13
44+
; GFX8-NEXT: v_or_b32_e32 v6, v6, v14
45+
; GFX8-NEXT: v_or_b32_e32 v7, v7, v15
46+
; GFX8-NEXT: s_setpc_b64 s[30:31]
47+
;
48+
; GFX10-LABEL: v_or_v2i128:
49+
; GFX10: ; %bb.0:
50+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51+
; GFX10-NEXT: v_or_b32_e32 v0, v0, v8
52+
; GFX10-NEXT: v_or_b32_e32 v1, v1, v9
53+
; GFX10-NEXT: v_or_b32_e32 v2, v2, v10
54+
; GFX10-NEXT: v_or_b32_e32 v3, v3, v11
55+
; GFX10-NEXT: v_or_b32_e32 v4, v4, v12
56+
; GFX10-NEXT: v_or_b32_e32 v5, v5, v13
57+
; GFX10-NEXT: v_or_b32_e32 v6, v6, v14
58+
; GFX10-NEXT: v_or_b32_e32 v7, v7, v15
59+
; GFX10-NEXT: s_setpc_b64 s[30:31]
60+
%or = or <2 x i128> %a, %b
61+
ret <2 x i128> %or
62+
}
63+
64+
define <2 x i128> @v_or_v2i128_inline_imm(<2 x i128> %a) {
65+
; GFX7-LABEL: v_or_v2i128_inline_imm:
66+
; GFX7: ; %bb.0:
67+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68+
; GFX7-NEXT: s_mov_b64 s[4:5], 64
69+
; GFX7-NEXT: s_mov_b64 s[6:7], 0
70+
; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5]
71+
; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7]
72+
; GFX7-NEXT: v_or_b32_e32 v0, s4, v0
73+
; GFX7-NEXT: v_or_b32_e32 v1, s5, v1
74+
; GFX7-NEXT: v_or_b32_e32 v2, s6, v2
75+
; GFX7-NEXT: v_or_b32_e32 v3, s7, v3
76+
; GFX7-NEXT: v_or_b32_e32 v4, s4, v4
77+
; GFX7-NEXT: v_or_b32_e32 v5, s5, v5
78+
; GFX7-NEXT: v_or_b32_e32 v6, s6, v6
79+
; GFX7-NEXT: v_or_b32_e32 v7, s7, v7
80+
; GFX7-NEXT: s_setpc_b64 s[30:31]
81+
;
82+
; GFX9-LABEL: v_or_v2i128_inline_imm:
83+
; GFX9: ; %bb.0:
84+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85+
; GFX9-NEXT: s_mov_b64 s[4:5], 64
86+
; GFX9-NEXT: s_mov_b64 s[6:7], 0
87+
; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5]
88+
; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7]
89+
; GFX9-NEXT: v_or_b32_e32 v0, s4, v0
90+
; GFX9-NEXT: v_or_b32_e32 v1, s5, v1
91+
; GFX9-NEXT: v_or_b32_e32 v2, s6, v2
92+
; GFX9-NEXT: v_or_b32_e32 v3, s7, v3
93+
; GFX9-NEXT: v_or_b32_e32 v4, s4, v4
94+
; GFX9-NEXT: v_or_b32_e32 v5, s5, v5
95+
; GFX9-NEXT: v_or_b32_e32 v6, s6, v6
96+
; GFX9-NEXT: v_or_b32_e32 v7, s7, v7
97+
; GFX9-NEXT: s_setpc_b64 s[30:31]
98+
;
99+
; GFX8-LABEL: v_or_v2i128_inline_imm:
100+
; GFX8: ; %bb.0:
101+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102+
; GFX8-NEXT: s_mov_b64 s[4:5], 64
103+
; GFX8-NEXT: s_mov_b64 s[6:7], 0
104+
; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5]
105+
; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7]
106+
; GFX8-NEXT: v_or_b32_e32 v0, s4, v0
107+
; GFX8-NEXT: v_or_b32_e32 v1, s5, v1
108+
; GFX8-NEXT: v_or_b32_e32 v2, s6, v2
109+
; GFX8-NEXT: v_or_b32_e32 v3, s7, v3
110+
; GFX8-NEXT: v_or_b32_e32 v4, s4, v4
111+
; GFX8-NEXT: v_or_b32_e32 v5, s5, v5
112+
; GFX8-NEXT: v_or_b32_e32 v6, s6, v6
113+
; GFX8-NEXT: v_or_b32_e32 v7, s7, v7
114+
; GFX8-NEXT: s_setpc_b64 s[30:31]
115+
%or = or <2 x i128> %a, <i128 64, i128 64>
116+
ret <2 x i128> %or
117+
}
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
3+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
4+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
5+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s
7+
8+
define <2 x i128> @v_xor_v2i128(<2 x i128> %a, <2 x i128> %b) {
9+
; GFX7-LABEL: v_xor_v2i128:
10+
; GFX7: ; %bb.0:
11+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12+
; GFX7-NEXT: v_xor_b32_e32 v0, v0, v8
13+
; GFX7-NEXT: v_xor_b32_e32 v1, v1, v9
14+
; GFX7-NEXT: v_xor_b32_e32 v2, v2, v10
15+
; GFX7-NEXT: v_xor_b32_e32 v3, v3, v11
16+
; GFX7-NEXT: v_xor_b32_e32 v4, v4, v12
17+
; GFX7-NEXT: v_xor_b32_e32 v5, v5, v13
18+
; GFX7-NEXT: v_xor_b32_e32 v6, v6, v14
19+
; GFX7-NEXT: v_xor_b32_e32 v7, v7, v15
20+
; GFX7-NEXT: s_setpc_b64 s[30:31]
21+
;
22+
; GFX9-LABEL: v_xor_v2i128:
23+
; GFX9: ; %bb.0:
24+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25+
; GFX9-NEXT: v_xor_b32_e32 v0, v0, v8
26+
; GFX9-NEXT: v_xor_b32_e32 v1, v1, v9
27+
; GFX9-NEXT: v_xor_b32_e32 v2, v2, v10
28+
; GFX9-NEXT: v_xor_b32_e32 v3, v3, v11
29+
; GFX9-NEXT: v_xor_b32_e32 v4, v4, v12
30+
; GFX9-NEXT: v_xor_b32_e32 v5, v5, v13
31+
; GFX9-NEXT: v_xor_b32_e32 v6, v6, v14
32+
; GFX9-NEXT: v_xor_b32_e32 v7, v7, v15
33+
; GFX9-NEXT: s_setpc_b64 s[30:31]
34+
;
35+
; GFX8-LABEL: v_xor_v2i128:
36+
; GFX8: ; %bb.0:
37+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38+
; GFX8-NEXT: v_xor_b32_e32 v0, v0, v8
39+
; GFX8-NEXT: v_xor_b32_e32 v1, v1, v9
40+
; GFX8-NEXT: v_xor_b32_e32 v2, v2, v10
41+
; GFX8-NEXT: v_xor_b32_e32 v3, v3, v11
42+
; GFX8-NEXT: v_xor_b32_e32 v4, v4, v12
43+
; GFX8-NEXT: v_xor_b32_e32 v5, v5, v13
44+
; GFX8-NEXT: v_xor_b32_e32 v6, v6, v14
45+
; GFX8-NEXT: v_xor_b32_e32 v7, v7, v15
46+
; GFX8-NEXT: s_setpc_b64 s[30:31]
47+
;
48+
; GFX10-LABEL: v_xor_v2i128:
49+
; GFX10: ; %bb.0:
50+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51+
; GFX10-NEXT: v_xor_b32_e32 v0, v0, v8
52+
; GFX10-NEXT: v_xor_b32_e32 v1, v1, v9
53+
; GFX10-NEXT: v_xor_b32_e32 v2, v2, v10
54+
; GFX10-NEXT: v_xor_b32_e32 v3, v3, v11
55+
; GFX10-NEXT: v_xor_b32_e32 v4, v4, v12
56+
; GFX10-NEXT: v_xor_b32_e32 v5, v5, v13
57+
; GFX10-NEXT: v_xor_b32_e32 v6, v6, v14
58+
; GFX10-NEXT: v_xor_b32_e32 v7, v7, v15
59+
; GFX10-NEXT: s_setpc_b64 s[30:31]
60+
%xor = xor <2 x i128> %a, %b
61+
ret <2 x i128> %xor
62+
}
63+
64+
define <2 x i128> @v_xor_v2i128_inline_imm(<2 x i128> %a) {
65+
; GFX7-LABEL: v_xor_v2i128_inline_imm:
66+
; GFX7: ; %bb.0:
67+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68+
; GFX7-NEXT: s_mov_b64 s[4:5], 64
69+
; GFX7-NEXT: s_mov_b64 s[6:7], 0
70+
; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5]
71+
; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7]
72+
; GFX7-NEXT: v_xor_b32_e32 v0, s4, v0
73+
; GFX7-NEXT: v_xor_b32_e32 v1, s5, v1
74+
; GFX7-NEXT: v_xor_b32_e32 v2, s6, v2
75+
; GFX7-NEXT: v_xor_b32_e32 v3, s7, v3
76+
; GFX7-NEXT: v_xor_b32_e32 v4, s4, v4
77+
; GFX7-NEXT: v_xor_b32_e32 v5, s5, v5
78+
; GFX7-NEXT: v_xor_b32_e32 v6, s6, v6
79+
; GFX7-NEXT: v_xor_b32_e32 v7, s7, v7
80+
; GFX7-NEXT: s_setpc_b64 s[30:31]
81+
;
82+
; GFX9-LABEL: v_xor_v2i128_inline_imm:
83+
; GFX9: ; %bb.0:
84+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85+
; GFX9-NEXT: s_mov_b64 s[4:5], 64
86+
; GFX9-NEXT: s_mov_b64 s[6:7], 0
87+
; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5]
88+
; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7]
89+
; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0
90+
; GFX9-NEXT: v_xor_b32_e32 v1, s5, v1
91+
; GFX9-NEXT: v_xor_b32_e32 v2, s6, v2
92+
; GFX9-NEXT: v_xor_b32_e32 v3, s7, v3
93+
; GFX9-NEXT: v_xor_b32_e32 v4, s4, v4
94+
; GFX9-NEXT: v_xor_b32_e32 v5, s5, v5
95+
; GFX9-NEXT: v_xor_b32_e32 v6, s6, v6
96+
; GFX9-NEXT: v_xor_b32_e32 v7, s7, v7
97+
; GFX9-NEXT: s_setpc_b64 s[30:31]
98+
;
99+
; GFX8-LABEL: v_xor_v2i128_inline_imm:
100+
; GFX8: ; %bb.0:
101+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102+
; GFX8-NEXT: s_mov_b64 s[4:5], 64
103+
; GFX8-NEXT: s_mov_b64 s[6:7], 0
104+
; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5]
105+
; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7]
106+
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
107+
; GFX8-NEXT: v_xor_b32_e32 v1, s5, v1
108+
; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2
109+
; GFX8-NEXT: v_xor_b32_e32 v3, s7, v3
110+
; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4
111+
; GFX8-NEXT: v_xor_b32_e32 v5, s5, v5
112+
; GFX8-NEXT: v_xor_b32_e32 v6, s6, v6
113+
; GFX8-NEXT: v_xor_b32_e32 v7, s7, v7
114+
; GFX8-NEXT: s_setpc_b64 s[30:31]
115+
%xor = xor <2 x i128> %a, <i128 64, i128 64>
116+
ret <2 x i128> %xor
117+
}

0 commit comments

Comments
 (0)