Skip to content

Commit e9c4dc1

Browse files
committed
Revert "[AMDGPU] Use S_CSELECT for uniform i1 ext (llvm#69703)"
This reverts commit a1260b5. It was causing some Vulkan CTS failures.
1 parent 64025b8 commit e9c4dc1

File tree

5 files changed

+77
-107
lines changed

5 files changed

+77
-107
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2278,24 +2278,17 @@ def : GCNPat <
22782278
(REG_SEQUENCE SReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1)
22792279
>;
22802280

2281-
multiclass ZExt_i64_i1_Pat <SDNode ext> {
2282-
def: GCNPat <
2283-
(i64 (ext i1:$src)),
2284-
(REG_SEQUENCE VReg_64,
2285-
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2286-
/*src1mod*/(i32 0), /*src1*/(i32 1), $src),
2287-
sub0, (S_MOV_B32 (i32 0)), sub1)
2288-
>;
2289-
2290-
def : GCNPat <
2291-
(i64 (UniformUnaryFrag<ext> SCC)),
2292-
(S_CSELECT_B64 (i64 1), (i64 0))
2293-
>;
2294-
}
2281+
class ZExt_i64_i1_Pat <SDNode ext> : GCNPat <
2282+
(i64 (ext i1:$src)),
2283+
(REG_SEQUENCE VReg_64,
2284+
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2285+
/*src1mod*/(i32 0), /*src1*/(i32 1), $src),
2286+
sub0, (S_MOV_B32 (i32 0)), sub1)
2287+
>;
22952288

22962289

2297-
defm : ZExt_i64_i1_Pat<zext>;
2298-
defm : ZExt_i64_i1_Pat<anyext>;
2290+
def : ZExt_i64_i1_Pat<zext>;
2291+
def : ZExt_i64_i1_Pat<anyext>;
22992292

23002293
// FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that
23012294
// REG_SEQUENCE patterns don't support instructions with multiple outputs.

llvm/test/CodeGen/AMDGPU/saddo.ll

Lines changed: 27 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,10 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
2929
; SI-NEXT: s_mov_b32 s0, s4
3030
; SI-NEXT: s_mov_b32 s1, s5
3131
; SI-NEXT: s_xor_b64 s[4:5], s[6:7], vcc
32-
; SI-NEXT: s_and_b64 s[4:5], s[4:5], exec
33-
; SI-NEXT: s_cselect_b64 s[4:5], 1, 0
34-
; SI-NEXT: s_add_u32 s4, s10, s4
35-
; SI-NEXT: s_addc_u32 s5, s11, s5
36-
; SI-NEXT: v_mov_b32_e32 v0, s4
37-
; SI-NEXT: v_mov_b32_e32 v1, s5
32+
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
33+
; SI-NEXT: v_mov_b32_e32 v1, s11
34+
; SI-NEXT: v_add_i32_e32 v0, vcc, s10, v0
35+
; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3836
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
3937
; SI-NEXT: s_endpgm
4038
;
@@ -47,17 +45,15 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
4745
; VI-NEXT: s_add_u32 s2, s6, s0
4846
; VI-NEXT: v_mov_b32_e32 v2, s7
4947
; VI-NEXT: s_addc_u32 s3, s7, s1
48+
; VI-NEXT: v_cmp_lt_i64_e64 s[8:9], s[0:1], 0
5049
; VI-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[1:2]
51-
; VI-NEXT: v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
50+
; VI-NEXT: v_mov_b32_e32 v3, s3
51+
; VI-NEXT: s_xor_b64 s[0:1], s[8:9], vcc
52+
; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
53+
; VI-NEXT: v_add_u32_e32 v2, vcc, s2, v2
5254
; VI-NEXT: v_mov_b32_e32 v0, s4
53-
; VI-NEXT: s_xor_b64 s[0:1], s[0:1], vcc
54-
; VI-NEXT: s_and_b64 s[0:1], s[0:1], exec
55-
; VI-NEXT: s_cselect_b64 s[0:1], 1, 0
56-
; VI-NEXT: s_add_u32 s0, s2, s0
57-
; VI-NEXT: s_addc_u32 s1, s3, s1
58-
; VI-NEXT: v_mov_b32_e32 v3, s1
5955
; VI-NEXT: v_mov_b32_e32 v1, s5
60-
; VI-NEXT: v_mov_b32_e32 v2, s0
56+
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
6157
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
6258
; VI-NEXT: s_endpgm
6359
;
@@ -71,15 +67,13 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
7167
; GFX9-NEXT: s_add_u32 s0, s6, s2
7268
; GFX9-NEXT: v_mov_b32_e32 v1, s7
7369
; GFX9-NEXT: s_addc_u32 s1, s7, s3
70+
; GFX9-NEXT: v_cmp_lt_i64_e64 s[8:9], s[2:3], 0
7471
; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1]
75-
; GFX9-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], 0
76-
; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], vcc
77-
; GFX9-NEXT: s_and_b64 s[2:3], s[2:3], exec
78-
; GFX9-NEXT: s_cselect_b64 s[2:3], 1, 0
79-
; GFX9-NEXT: s_add_u32 s0, s0, s2
80-
; GFX9-NEXT: s_addc_u32 s1, s1, s3
81-
; GFX9-NEXT: v_mov_b32_e32 v0, s0
8272
; GFX9-NEXT: v_mov_b32_e32 v1, s1
73+
; GFX9-NEXT: s_xor_b64 s[2:3], s[8:9], vcc
74+
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
75+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
76+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
8377
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
8478
; GFX9-NEXT: s_endpgm
8579
;
@@ -93,14 +87,11 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
9387
; GFX10-NEXT: s_add_u32 s0, s6, s2
9488
; GFX10-NEXT: s_addc_u32 s1, s7, s3
9589
; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], 0
96-
; GFX10-NEXT: v_cmp_lt_i64_e64 s6, s[0:1], s[6:7]
97-
; GFX10-NEXT: s_xor_b32 s2, s2, s6
98-
; GFX10-NEXT: s_and_b32 s2, s2, exec_lo
99-
; GFX10-NEXT: s_cselect_b64 s[2:3], 1, 0
100-
; GFX10-NEXT: s_add_u32 s0, s0, s2
101-
; GFX10-NEXT: s_addc_u32 s1, s1, s3
102-
; GFX10-NEXT: v_mov_b32_e32 v0, s0
103-
; GFX10-NEXT: v_mov_b32_e32 v1, s1
90+
; GFX10-NEXT: v_cmp_lt_i64_e64 s3, s[0:1], s[6:7]
91+
; GFX10-NEXT: s_xor_b32 s2, s2, s3
92+
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
93+
; GFX10-NEXT: v_add_co_u32 v0, s0, s0, v0
94+
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
10495
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
10596
; GFX10-NEXT: s_endpgm
10697
;
@@ -109,20 +100,18 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
109100
; GFX11-NEXT: s_clause 0x1
110101
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
111102
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
103+
; GFX11-NEXT: v_mov_b32_e32 v2, 0
112104
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
113105
; GFX11-NEXT: s_add_u32 s2, s6, s0
114106
; GFX11-NEXT: s_addc_u32 s3, s7, s1
115107
; GFX11-NEXT: v_cmp_lt_i64_e64 s0, s[0:1], 0
116-
; GFX11-NEXT: v_cmp_lt_i64_e64 s6, s[2:3], s[6:7]
108+
; GFX11-NEXT: v_cmp_lt_i64_e64 s1, s[2:3], s[6:7]
117109
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
118-
; GFX11-NEXT: s_xor_b32 s0, s0, s6
119-
; GFX11-NEXT: s_and_b32 s0, s0, exec_lo
120-
; GFX11-NEXT: s_cselect_b64 s[0:1], 1, 0
121-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
122-
; GFX11-NEXT: s_add_u32 s0, s2, s0
123-
; GFX11-NEXT: s_addc_u32 s1, s3, s1
124-
; GFX11-NEXT: v_mov_b32_e32 v0, s0
125-
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1
110+
; GFX11-NEXT: s_xor_b32 s0, s0, s1
111+
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
112+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
113+
; GFX11-NEXT: v_add_co_u32 v0, s0, s2, v0
114+
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0
126115
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
127116
; GFX11-NEXT: s_nop 0
128117
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)

llvm/test/CodeGen/AMDGPU/uaddo.ll

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,21 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
77
; SI-LABEL: s_uaddo_i64_zext:
88
; SI: ; %bb.0:
99
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
10-
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd
10+
; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd
1111
; SI-NEXT: s_mov_b32 s3, 0xf000
12-
; SI-NEXT: s_waitcnt lgkmcnt(0)
13-
; SI-NEXT: s_add_u32 s0, s6, s0
14-
; SI-NEXT: v_mov_b32_e32 v0, s6
15-
; SI-NEXT: v_mov_b32_e32 v1, s7
16-
; SI-NEXT: s_addc_u32 s1, s7, s1
17-
; SI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1]
18-
; SI-NEXT: s_and_b64 s[6:7], vcc, exec
19-
; SI-NEXT: s_cselect_b64 s[6:7], 1, 0
20-
; SI-NEXT: s_add_u32 s6, s0, s6
21-
; SI-NEXT: s_addc_u32 s7, s1, s7
2212
; SI-NEXT: s_mov_b32 s2, -1
13+
; SI-NEXT: s_waitcnt lgkmcnt(0)
2314
; SI-NEXT: s_mov_b32 s0, s4
2415
; SI-NEXT: s_mov_b32 s1, s5
16+
; SI-NEXT: s_add_u32 s4, s6, s8
2517
; SI-NEXT: v_mov_b32_e32 v0, s6
2618
; SI-NEXT: v_mov_b32_e32 v1, s7
19+
; SI-NEXT: s_addc_u32 s5, s7, s9
20+
; SI-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1]
21+
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
22+
; SI-NEXT: v_mov_b32_e32 v1, s5
23+
; SI-NEXT: v_add_i32_e32 v0, vcc, s4, v0
24+
; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2725
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
2826
; SI-NEXT: s_endpgm
2927
;
@@ -32,19 +30,17 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
3230
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
3331
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
3432
; VI-NEXT: s_waitcnt lgkmcnt(0)
35-
; VI-NEXT: v_mov_b32_e32 v1, s6
33+
; VI-NEXT: v_mov_b32_e32 v2, s6
3634
; VI-NEXT: s_add_u32 s0, s6, s0
35+
; VI-NEXT: v_mov_b32_e32 v3, s7
3736
; VI-NEXT: s_addc_u32 s1, s7, s1
38-
; VI-NEXT: v_mov_b32_e32 v2, s7
39-
; VI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[1:2]
40-
; VI-NEXT: v_mov_b32_e32 v0, s4
41-
; VI-NEXT: s_and_b64 s[2:3], vcc, exec
42-
; VI-NEXT: s_cselect_b64 s[2:3], 1, 0
43-
; VI-NEXT: s_add_u32 s0, s0, s2
44-
; VI-NEXT: s_addc_u32 s1, s1, s3
37+
; VI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[2:3]
4538
; VI-NEXT: v_mov_b32_e32 v3, s1
39+
; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
40+
; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
41+
; VI-NEXT: v_mov_b32_e32 v0, s4
4642
; VI-NEXT: v_mov_b32_e32 v1, s5
47-
; VI-NEXT: v_mov_b32_e32 v2, s0
43+
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4844
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
4945
; VI-NEXT: s_endpgm
5046
;
@@ -56,15 +52,13 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
5652
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5753
; GFX9-NEXT: v_mov_b32_e32 v0, s6
5854
; GFX9-NEXT: s_add_u32 s0, s6, s2
59-
; GFX9-NEXT: s_addc_u32 s1, s7, s3
6055
; GFX9-NEXT: v_mov_b32_e32 v1, s7
56+
; GFX9-NEXT: s_addc_u32 s1, s7, s3
6157
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1]
62-
; GFX9-NEXT: s_and_b64 s[2:3], vcc, exec
63-
; GFX9-NEXT: s_cselect_b64 s[2:3], 1, 0
64-
; GFX9-NEXT: s_add_u32 s0, s0, s2
65-
; GFX9-NEXT: s_addc_u32 s1, s1, s3
66-
; GFX9-NEXT: v_mov_b32_e32 v0, s0
6758
; GFX9-NEXT: v_mov_b32_e32 v1, s1
59+
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
60+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
61+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
6862
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
6963
; GFX9-NEXT: s_endpgm
7064
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)

llvm/test/CodeGen/AMDGPU/usubo.ll

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,21 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
88
; SI-LABEL: s_usubo_i64_zext:
99
; SI: ; %bb.0:
1010
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
11-
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd
11+
; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd
1212
; SI-NEXT: s_mov_b32 s3, 0xf000
13-
; SI-NEXT: s_waitcnt lgkmcnt(0)
14-
; SI-NEXT: s_sub_u32 s0, s6, s0
15-
; SI-NEXT: v_mov_b32_e32 v0, s6
16-
; SI-NEXT: v_mov_b32_e32 v1, s7
17-
; SI-NEXT: s_subb_u32 s1, s7, s1
18-
; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1]
19-
; SI-NEXT: s_and_b64 s[6:7], vcc, exec
20-
; SI-NEXT: s_cselect_b64 s[6:7], 1, 0
21-
; SI-NEXT: s_add_u32 s6, s0, s6
22-
; SI-NEXT: s_addc_u32 s7, s1, s7
2313
; SI-NEXT: s_mov_b32 s2, -1
14+
; SI-NEXT: s_waitcnt lgkmcnt(0)
2415
; SI-NEXT: s_mov_b32 s0, s4
2516
; SI-NEXT: s_mov_b32 s1, s5
17+
; SI-NEXT: s_sub_u32 s4, s6, s8
2618
; SI-NEXT: v_mov_b32_e32 v0, s6
2719
; SI-NEXT: v_mov_b32_e32 v1, s7
20+
; SI-NEXT: s_subb_u32 s5, s7, s9
21+
; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
22+
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
23+
; SI-NEXT: v_mov_b32_e32 v1, s5
24+
; SI-NEXT: v_add_i32_e32 v0, vcc, s4, v0
25+
; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2826
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
2927
; SI-NEXT: s_endpgm
3028
;
@@ -33,19 +31,17 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
3331
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
3432
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
3533
; VI-NEXT: s_waitcnt lgkmcnt(0)
36-
; VI-NEXT: v_mov_b32_e32 v1, s6
34+
; VI-NEXT: v_mov_b32_e32 v2, s6
3735
; VI-NEXT: s_sub_u32 s0, s6, s0
36+
; VI-NEXT: v_mov_b32_e32 v3, s7
3837
; VI-NEXT: s_subb_u32 s1, s7, s1
39-
; VI-NEXT: v_mov_b32_e32 v2, s7
40-
; VI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[1:2]
41-
; VI-NEXT: v_mov_b32_e32 v0, s4
42-
; VI-NEXT: s_and_b64 s[2:3], vcc, exec
43-
; VI-NEXT: s_cselect_b64 s[2:3], 1, 0
44-
; VI-NEXT: s_add_u32 s0, s0, s2
45-
; VI-NEXT: s_addc_u32 s1, s1, s3
38+
; VI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[2:3]
4639
; VI-NEXT: v_mov_b32_e32 v3, s1
40+
; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
41+
; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
42+
; VI-NEXT: v_mov_b32_e32 v0, s4
4743
; VI-NEXT: v_mov_b32_e32 v1, s5
48-
; VI-NEXT: v_mov_b32_e32 v2, s0
44+
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4945
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
5046
; VI-NEXT: s_endpgm
5147
;
@@ -57,15 +53,13 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
5753
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5854
; GFX9-NEXT: v_mov_b32_e32 v0, s6
5955
; GFX9-NEXT: s_sub_u32 s0, s6, s2
60-
; GFX9-NEXT: s_subb_u32 s1, s7, s3
6156
; GFX9-NEXT: v_mov_b32_e32 v1, s7
57+
; GFX9-NEXT: s_subb_u32 s1, s7, s3
6258
; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1]
63-
; GFX9-NEXT: s_and_b64 s[2:3], vcc, exec
64-
; GFX9-NEXT: s_cselect_b64 s[2:3], 1, 0
65-
; GFX9-NEXT: s_add_u32 s0, s0, s2
66-
; GFX9-NEXT: s_addc_u32 s1, s1, s3
67-
; GFX9-NEXT: v_mov_b32_e32 v0, s0
6859
; GFX9-NEXT: v_mov_b32_e32 v1, s1
60+
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
61+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
62+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
6963
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
7064
; GFX9-NEXT: s_endpgm
7165
%usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) #0

llvm/test/CodeGen/AMDGPU/zero_extend.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ define amdgpu_kernel void @s_arg_zext_i1_to_i64(ptr addrspace(1) %out, i1 zeroex
3838
; GCN-LABEL: {{^}}s_cmp_zext_i1_to_i64:
3939
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 0
4040
; GCN-DAG: s_cmp_eq_u32
41-
; GCN: s_cselect_b64 s[{{[0-9]+:[0-9]+}}], 1, 0
41+
; GCN: v_cndmask_b32
4242
define amdgpu_kernel void @s_cmp_zext_i1_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
4343
%cmp = icmp eq i32 %a, %b
4444
%ext = zext i1 %cmp to i64

0 commit comments

Comments
 (0)