Skip to content

Commit a1260b5

Browse files
authored
[AMDGPU] Use S_CSELECT for uniform i1 ext (#69703)
Solves #59869
1 parent de7c006 commit a1260b5

File tree

5 files changed

+107
-77
lines changed

5 files changed

+107
-77
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2278,17 +2278,24 @@ def : GCNPat <
22782278
(REG_SEQUENCE SReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1)
22792279
>;
22802280

2281-
class ZExt_i64_i1_Pat <SDNode ext> : GCNPat <
2282-
(i64 (ext i1:$src)),
2283-
(REG_SEQUENCE VReg_64,
2284-
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2285-
/*src1mod*/(i32 0), /*src1*/(i32 1), $src),
2286-
sub0, (S_MOV_B32 (i32 0)), sub1)
2287-
>;
2281+
multiclass ZExt_i64_i1_Pat <SDNode ext> {
2282+
def: GCNPat <
2283+
(i64 (ext i1:$src)),
2284+
(REG_SEQUENCE VReg_64,
2285+
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2286+
/*src1mod*/(i32 0), /*src1*/(i32 1), $src),
2287+
sub0, (S_MOV_B32 (i32 0)), sub1)
2288+
>;
2289+
2290+
def : GCNPat <
2291+
(i64 (UniformUnaryFrag<ext> SCC)),
2292+
(S_CSELECT_B64 (i64 1), (i64 0))
2293+
>;
2294+
}
22882295

22892296

2290-
def : ZExt_i64_i1_Pat<zext>;
2291-
def : ZExt_i64_i1_Pat<anyext>;
2297+
defm : ZExt_i64_i1_Pat<zext>;
2298+
defm : ZExt_i64_i1_Pat<anyext>;
22922299

22932300
// FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that
22942301
// REG_SEQUENCE patterns don't support instructions with multiple outputs.

llvm/test/CodeGen/AMDGPU/saddo.ll

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,12 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
2929
; SI-NEXT: s_mov_b32 s0, s4
3030
; SI-NEXT: s_mov_b32 s1, s5
3131
; SI-NEXT: s_xor_b64 s[4:5], s[6:7], vcc
32-
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
33-
; SI-NEXT: v_mov_b32_e32 v1, s11
34-
; SI-NEXT: v_add_i32_e32 v0, vcc, s10, v0
35-
; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
32+
; SI-NEXT: s_and_b64 s[4:5], s[4:5], exec
33+
; SI-NEXT: s_cselect_b64 s[4:5], 1, 0
34+
; SI-NEXT: s_add_u32 s4, s10, s4
35+
; SI-NEXT: s_addc_u32 s5, s11, s5
36+
; SI-NEXT: v_mov_b32_e32 v0, s4
37+
; SI-NEXT: v_mov_b32_e32 v1, s5
3638
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
3739
; SI-NEXT: s_endpgm
3840
;
@@ -45,15 +47,17 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
4547
; VI-NEXT: s_add_u32 s2, s6, s0
4648
; VI-NEXT: v_mov_b32_e32 v2, s7
4749
; VI-NEXT: s_addc_u32 s3, s7, s1
48-
; VI-NEXT: v_cmp_lt_i64_e64 s[8:9], s[0:1], 0
4950
; VI-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[1:2]
50-
; VI-NEXT: v_mov_b32_e32 v3, s3
51-
; VI-NEXT: s_xor_b64 s[0:1], s[8:9], vcc
52-
; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
53-
; VI-NEXT: v_add_u32_e32 v2, vcc, s2, v2
51+
; VI-NEXT: v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
5452
; VI-NEXT: v_mov_b32_e32 v0, s4
53+
; VI-NEXT: s_xor_b64 s[0:1], s[0:1], vcc
54+
; VI-NEXT: s_and_b64 s[0:1], s[0:1], exec
55+
; VI-NEXT: s_cselect_b64 s[0:1], 1, 0
56+
; VI-NEXT: s_add_u32 s0, s2, s0
57+
; VI-NEXT: s_addc_u32 s1, s3, s1
58+
; VI-NEXT: v_mov_b32_e32 v3, s1
5559
; VI-NEXT: v_mov_b32_e32 v1, s5
56-
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
60+
; VI-NEXT: v_mov_b32_e32 v2, s0
5761
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
5862
; VI-NEXT: s_endpgm
5963
;
@@ -67,13 +71,15 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
6771
; GFX9-NEXT: s_add_u32 s0, s6, s2
6872
; GFX9-NEXT: v_mov_b32_e32 v1, s7
6973
; GFX9-NEXT: s_addc_u32 s1, s7, s3
70-
; GFX9-NEXT: v_cmp_lt_i64_e64 s[8:9], s[2:3], 0
7174
; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1]
75+
; GFX9-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], 0
76+
; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], vcc
77+
; GFX9-NEXT: s_and_b64 s[2:3], s[2:3], exec
78+
; GFX9-NEXT: s_cselect_b64 s[2:3], 1, 0
79+
; GFX9-NEXT: s_add_u32 s0, s0, s2
80+
; GFX9-NEXT: s_addc_u32 s1, s1, s3
81+
; GFX9-NEXT: v_mov_b32_e32 v0, s0
7282
; GFX9-NEXT: v_mov_b32_e32 v1, s1
73-
; GFX9-NEXT: s_xor_b64 s[2:3], s[8:9], vcc
74-
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
75-
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
76-
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
7783
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
7884
; GFX9-NEXT: s_endpgm
7985
;
@@ -87,11 +93,14 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
8793
; GFX10-NEXT: s_add_u32 s0, s6, s2
8894
; GFX10-NEXT: s_addc_u32 s1, s7, s3
8995
; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], 0
90-
; GFX10-NEXT: v_cmp_lt_i64_e64 s3, s[0:1], s[6:7]
91-
; GFX10-NEXT: s_xor_b32 s2, s2, s3
92-
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
93-
; GFX10-NEXT: v_add_co_u32 v0, s0, s0, v0
94-
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
96+
; GFX10-NEXT: v_cmp_lt_i64_e64 s6, s[0:1], s[6:7]
97+
; GFX10-NEXT: s_xor_b32 s2, s2, s6
98+
; GFX10-NEXT: s_and_b32 s2, s2, exec_lo
99+
; GFX10-NEXT: s_cselect_b64 s[2:3], 1, 0
100+
; GFX10-NEXT: s_add_u32 s0, s0, s2
101+
; GFX10-NEXT: s_addc_u32 s1, s1, s3
102+
; GFX10-NEXT: v_mov_b32_e32 v0, s0
103+
; GFX10-NEXT: v_mov_b32_e32 v1, s1
95104
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
96105
; GFX10-NEXT: s_endpgm
97106
;
@@ -100,18 +109,20 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
100109
; GFX11-NEXT: s_clause 0x1
101110
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
102111
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
103-
; GFX11-NEXT: v_mov_b32_e32 v2, 0
104112
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
105113
; GFX11-NEXT: s_add_u32 s2, s6, s0
106114
; GFX11-NEXT: s_addc_u32 s3, s7, s1
107115
; GFX11-NEXT: v_cmp_lt_i64_e64 s0, s[0:1], 0
108-
; GFX11-NEXT: v_cmp_lt_i64_e64 s1, s[2:3], s[6:7]
116+
; GFX11-NEXT: v_cmp_lt_i64_e64 s6, s[2:3], s[6:7]
109117
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
110-
; GFX11-NEXT: s_xor_b32 s0, s0, s1
111-
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
112-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
113-
; GFX11-NEXT: v_add_co_u32 v0, s0, s2, v0
114-
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0
118+
; GFX11-NEXT: s_xor_b32 s0, s0, s6
119+
; GFX11-NEXT: s_and_b32 s0, s0, exec_lo
120+
; GFX11-NEXT: s_cselect_b64 s[0:1], 1, 0
121+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
122+
; GFX11-NEXT: s_add_u32 s0, s2, s0
123+
; GFX11-NEXT: s_addc_u32 s1, s3, s1
124+
; GFX11-NEXT: v_mov_b32_e32 v0, s0
125+
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1
115126
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
116127
; GFX11-NEXT: s_nop 0
117128
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)

llvm/test/CodeGen/AMDGPU/uaddo.ll

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,23 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
77
; SI-LABEL: s_uaddo_i64_zext:
88
; SI: ; %bb.0:
99
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
10-
; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd
10+
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd
1111
; SI-NEXT: s_mov_b32 s3, 0xf000
12-
; SI-NEXT: s_mov_b32 s2, -1
1312
; SI-NEXT: s_waitcnt lgkmcnt(0)
13+
; SI-NEXT: s_add_u32 s0, s6, s0
14+
; SI-NEXT: v_mov_b32_e32 v0, s6
15+
; SI-NEXT: v_mov_b32_e32 v1, s7
16+
; SI-NEXT: s_addc_u32 s1, s7, s1
17+
; SI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1]
18+
; SI-NEXT: s_and_b64 s[6:7], vcc, exec
19+
; SI-NEXT: s_cselect_b64 s[6:7], 1, 0
20+
; SI-NEXT: s_add_u32 s6, s0, s6
21+
; SI-NEXT: s_addc_u32 s7, s1, s7
22+
; SI-NEXT: s_mov_b32 s2, -1
1423
; SI-NEXT: s_mov_b32 s0, s4
1524
; SI-NEXT: s_mov_b32 s1, s5
16-
; SI-NEXT: s_add_u32 s4, s6, s8
1725
; SI-NEXT: v_mov_b32_e32 v0, s6
1826
; SI-NEXT: v_mov_b32_e32 v1, s7
19-
; SI-NEXT: s_addc_u32 s5, s7, s9
20-
; SI-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1]
21-
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
22-
; SI-NEXT: v_mov_b32_e32 v1, s5
23-
; SI-NEXT: v_add_i32_e32 v0, vcc, s4, v0
24-
; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2527
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
2628
; SI-NEXT: s_endpgm
2729
;
@@ -30,17 +32,19 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
3032
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
3133
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
3234
; VI-NEXT: s_waitcnt lgkmcnt(0)
33-
; VI-NEXT: v_mov_b32_e32 v2, s6
35+
; VI-NEXT: v_mov_b32_e32 v1, s6
3436
; VI-NEXT: s_add_u32 s0, s6, s0
35-
; VI-NEXT: v_mov_b32_e32 v3, s7
3637
; VI-NEXT: s_addc_u32 s1, s7, s1
37-
; VI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[2:3]
38-
; VI-NEXT: v_mov_b32_e32 v3, s1
39-
; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
40-
; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
38+
; VI-NEXT: v_mov_b32_e32 v2, s7
39+
; VI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[1:2]
4140
; VI-NEXT: v_mov_b32_e32 v0, s4
41+
; VI-NEXT: s_and_b64 s[2:3], vcc, exec
42+
; VI-NEXT: s_cselect_b64 s[2:3], 1, 0
43+
; VI-NEXT: s_add_u32 s0, s0, s2
44+
; VI-NEXT: s_addc_u32 s1, s1, s3
45+
; VI-NEXT: v_mov_b32_e32 v3, s1
4246
; VI-NEXT: v_mov_b32_e32 v1, s5
43-
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
47+
; VI-NEXT: v_mov_b32_e32 v2, s0
4448
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
4549
; VI-NEXT: s_endpgm
4650
;
@@ -52,13 +56,15 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
5256
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5357
; GFX9-NEXT: v_mov_b32_e32 v0, s6
5458
; GFX9-NEXT: s_add_u32 s0, s6, s2
55-
; GFX9-NEXT: v_mov_b32_e32 v1, s7
5659
; GFX9-NEXT: s_addc_u32 s1, s7, s3
60+
; GFX9-NEXT: v_mov_b32_e32 v1, s7
5761
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1]
62+
; GFX9-NEXT: s_and_b64 s[2:3], vcc, exec
63+
; GFX9-NEXT: s_cselect_b64 s[2:3], 1, 0
64+
; GFX9-NEXT: s_add_u32 s0, s0, s2
65+
; GFX9-NEXT: s_addc_u32 s1, s1, s3
66+
; GFX9-NEXT: v_mov_b32_e32 v0, s0
5867
; GFX9-NEXT: v_mov_b32_e32 v1, s1
59-
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
60-
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
61-
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
6268
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
6369
; GFX9-NEXT: s_endpgm
6470
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)

llvm/test/CodeGen/AMDGPU/usubo.ll

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,23 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
88
; SI-LABEL: s_usubo_i64_zext:
99
; SI: ; %bb.0:
1010
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
11-
; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd
11+
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd
1212
; SI-NEXT: s_mov_b32 s3, 0xf000
13-
; SI-NEXT: s_mov_b32 s2, -1
1413
; SI-NEXT: s_waitcnt lgkmcnt(0)
14+
; SI-NEXT: s_sub_u32 s0, s6, s0
15+
; SI-NEXT: v_mov_b32_e32 v0, s6
16+
; SI-NEXT: v_mov_b32_e32 v1, s7
17+
; SI-NEXT: s_subb_u32 s1, s7, s1
18+
; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1]
19+
; SI-NEXT: s_and_b64 s[6:7], vcc, exec
20+
; SI-NEXT: s_cselect_b64 s[6:7], 1, 0
21+
; SI-NEXT: s_add_u32 s6, s0, s6
22+
; SI-NEXT: s_addc_u32 s7, s1, s7
23+
; SI-NEXT: s_mov_b32 s2, -1
1524
; SI-NEXT: s_mov_b32 s0, s4
1625
; SI-NEXT: s_mov_b32 s1, s5
17-
; SI-NEXT: s_sub_u32 s4, s6, s8
1826
; SI-NEXT: v_mov_b32_e32 v0, s6
1927
; SI-NEXT: v_mov_b32_e32 v1, s7
20-
; SI-NEXT: s_subb_u32 s5, s7, s9
21-
; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
22-
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
23-
; SI-NEXT: v_mov_b32_e32 v1, s5
24-
; SI-NEXT: v_add_i32_e32 v0, vcc, s4, v0
25-
; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2628
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
2729
; SI-NEXT: s_endpgm
2830
;
@@ -31,17 +33,19 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
3133
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
3234
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
3335
; VI-NEXT: s_waitcnt lgkmcnt(0)
34-
; VI-NEXT: v_mov_b32_e32 v2, s6
36+
; VI-NEXT: v_mov_b32_e32 v1, s6
3537
; VI-NEXT: s_sub_u32 s0, s6, s0
36-
; VI-NEXT: v_mov_b32_e32 v3, s7
3738
; VI-NEXT: s_subb_u32 s1, s7, s1
38-
; VI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[2:3]
39-
; VI-NEXT: v_mov_b32_e32 v3, s1
40-
; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
41-
; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
39+
; VI-NEXT: v_mov_b32_e32 v2, s7
40+
; VI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[1:2]
4241
; VI-NEXT: v_mov_b32_e32 v0, s4
42+
; VI-NEXT: s_and_b64 s[2:3], vcc, exec
43+
; VI-NEXT: s_cselect_b64 s[2:3], 1, 0
44+
; VI-NEXT: s_add_u32 s0, s0, s2
45+
; VI-NEXT: s_addc_u32 s1, s1, s3
46+
; VI-NEXT: v_mov_b32_e32 v3, s1
4347
; VI-NEXT: v_mov_b32_e32 v1, s5
44-
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
48+
; VI-NEXT: v_mov_b32_e32 v2, s0
4549
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
4650
; VI-NEXT: s_endpgm
4751
;
@@ -53,13 +57,15 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
5357
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5458
; GFX9-NEXT: v_mov_b32_e32 v0, s6
5559
; GFX9-NEXT: s_sub_u32 s0, s6, s2
56-
; GFX9-NEXT: v_mov_b32_e32 v1, s7
5760
; GFX9-NEXT: s_subb_u32 s1, s7, s3
61+
; GFX9-NEXT: v_mov_b32_e32 v1, s7
5862
; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1]
63+
; GFX9-NEXT: s_and_b64 s[2:3], vcc, exec
64+
; GFX9-NEXT: s_cselect_b64 s[2:3], 1, 0
65+
; GFX9-NEXT: s_add_u32 s0, s0, s2
66+
; GFX9-NEXT: s_addc_u32 s1, s1, s3
67+
; GFX9-NEXT: v_mov_b32_e32 v0, s0
5968
; GFX9-NEXT: v_mov_b32_e32 v1, s1
60-
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
61-
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
62-
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
6369
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
6470
; GFX9-NEXT: s_endpgm
6571
%usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) #0

llvm/test/CodeGen/AMDGPU/zero_extend.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ define amdgpu_kernel void @s_arg_zext_i1_to_i64(ptr addrspace(1) %out, i1 zeroex
3838
; GCN-LABEL: {{^}}s_cmp_zext_i1_to_i64:
3939
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 0
4040
; GCN-DAG: s_cmp_eq_u32
41-
; GCN: v_cndmask_b32
41+
; GCN: s_cselect_b64 s[{{[0-9]+:[0-9]+}}], 1, 0
4242
define amdgpu_kernel void @s_cmp_zext_i1_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
4343
%cmp = icmp eq i32 %a, %b
4444
%ext = zext i1 %cmp to i64

0 commit comments

Comments
 (0)