@@ -29,12 +29,10 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
29
29
; SI-NEXT: s_mov_b32 s0, s4
30
30
; SI-NEXT: s_mov_b32 s1, s5
31
31
; SI-NEXT: s_xor_b64 s[4:5], s[6:7], vcc
32
- ; SI-NEXT: s_and_b64 s[4:5], s[4:5], exec
33
- ; SI-NEXT: s_cselect_b64 s[4:5], 1, 0
34
- ; SI-NEXT: s_add_u32 s4, s10, s4
35
- ; SI-NEXT: s_addc_u32 s5, s11, s5
36
- ; SI-NEXT: v_mov_b32_e32 v0, s4
37
- ; SI-NEXT: v_mov_b32_e32 v1, s5
32
+ ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
33
+ ; SI-NEXT: v_mov_b32_e32 v1, s11
34
+ ; SI-NEXT: v_add_i32_e32 v0, vcc, s10, v0
35
+ ; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
38
36
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
39
37
; SI-NEXT: s_endpgm
40
38
;
@@ -47,17 +45,15 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
47
45
; VI-NEXT: s_add_u32 s2, s6, s0
48
46
; VI-NEXT: v_mov_b32_e32 v2, s7
49
47
; VI-NEXT: s_addc_u32 s3, s7, s1
48
+ ; VI-NEXT: v_cmp_lt_i64_e64 s[8:9], s[0:1], 0
50
49
; VI-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[1:2]
51
- ; VI-NEXT: v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
50
+ ; VI-NEXT: v_mov_b32_e32 v3, s3
51
+ ; VI-NEXT: s_xor_b64 s[0:1], s[8:9], vcc
52
+ ; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
53
+ ; VI-NEXT: v_add_u32_e32 v2, vcc, s2, v2
52
54
; VI-NEXT: v_mov_b32_e32 v0, s4
53
- ; VI-NEXT: s_xor_b64 s[0:1], s[0:1], vcc
54
- ; VI-NEXT: s_and_b64 s[0:1], s[0:1], exec
55
- ; VI-NEXT: s_cselect_b64 s[0:1], 1, 0
56
- ; VI-NEXT: s_add_u32 s0, s2, s0
57
- ; VI-NEXT: s_addc_u32 s1, s3, s1
58
- ; VI-NEXT: v_mov_b32_e32 v3, s1
59
55
; VI-NEXT: v_mov_b32_e32 v1, s5
60
- ; VI-NEXT: v_mov_b32_e32 v2, s0
56
+ ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
61
57
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
62
58
; VI-NEXT: s_endpgm
63
59
;
@@ -71,15 +67,13 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
71
67
; GFX9-NEXT: s_add_u32 s0, s6, s2
72
68
; GFX9-NEXT: v_mov_b32_e32 v1, s7
73
69
; GFX9-NEXT: s_addc_u32 s1, s7, s3
70
+ ; GFX9-NEXT: v_cmp_lt_i64_e64 s[8:9], s[2:3], 0
74
71
; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1]
75
- ; GFX9-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], 0
76
- ; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], vcc
77
- ; GFX9-NEXT: s_and_b64 s[2:3], s[2:3], exec
78
- ; GFX9-NEXT: s_cselect_b64 s[2:3], 1, 0
79
- ; GFX9-NEXT: s_add_u32 s0, s0, s2
80
- ; GFX9-NEXT: s_addc_u32 s1, s1, s3
81
- ; GFX9-NEXT: v_mov_b32_e32 v0, s0
82
72
; GFX9-NEXT: v_mov_b32_e32 v1, s1
73
+ ; GFX9-NEXT: s_xor_b64 s[2:3], s[8:9], vcc
74
+ ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
75
+ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
76
+ ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
83
77
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
84
78
; GFX9-NEXT: s_endpgm
85
79
;
@@ -93,14 +87,11 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
93
87
; GFX10-NEXT: s_add_u32 s0, s6, s2
94
88
; GFX10-NEXT: s_addc_u32 s1, s7, s3
95
89
; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], 0
96
- ; GFX10-NEXT: v_cmp_lt_i64_e64 s6, s[0:1], s[6:7]
97
- ; GFX10-NEXT: s_xor_b32 s2, s2, s6
98
- ; GFX10-NEXT: s_and_b32 s2, s2, exec_lo
99
- ; GFX10-NEXT: s_cselect_b64 s[2:3], 1, 0
100
- ; GFX10-NEXT: s_add_u32 s0, s0, s2
101
- ; GFX10-NEXT: s_addc_u32 s1, s1, s3
102
- ; GFX10-NEXT: v_mov_b32_e32 v0, s0
103
- ; GFX10-NEXT: v_mov_b32_e32 v1, s1
90
+ ; GFX10-NEXT: v_cmp_lt_i64_e64 s3, s[0:1], s[6:7]
91
+ ; GFX10-NEXT: s_xor_b32 s2, s2, s3
92
+ ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
93
+ ; GFX10-NEXT: v_add_co_u32 v0, s0, s0, v0
94
+ ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
104
95
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
105
96
; GFX10-NEXT: s_endpgm
106
97
;
@@ -109,20 +100,18 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
109
100
; GFX11-NEXT: s_clause 0x1
110
101
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
111
102
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
103
+ ; GFX11-NEXT: v_mov_b32_e32 v2, 0
112
104
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
113
105
; GFX11-NEXT: s_add_u32 s2, s6, s0
114
106
; GFX11-NEXT: s_addc_u32 s3, s7, s1
115
107
; GFX11-NEXT: v_cmp_lt_i64_e64 s0, s[0:1], 0
116
- ; GFX11-NEXT: v_cmp_lt_i64_e64 s6 , s[2:3], s[6:7]
108
+ ; GFX11-NEXT: v_cmp_lt_i64_e64 s1 , s[2:3], s[6:7]
117
109
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
118
- ; GFX11-NEXT: s_xor_b32 s0, s0, s6
119
- ; GFX11-NEXT: s_and_b32 s0, s0, exec_lo
120
- ; GFX11-NEXT: s_cselect_b64 s[0:1], 1, 0
121
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
122
- ; GFX11-NEXT: s_add_u32 s0, s2, s0
123
- ; GFX11-NEXT: s_addc_u32 s1, s3, s1
124
- ; GFX11-NEXT: v_mov_b32_e32 v0, s0
125
- ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1
110
+ ; GFX11-NEXT: s_xor_b32 s0, s0, s1
111
+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
112
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
113
+ ; GFX11-NEXT: v_add_co_u32 v0, s0, s2, v0
114
+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0
126
115
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
127
116
; GFX11-NEXT: s_nop 0
128
117
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
0 commit comments