Skip to content

Commit b006c30

Browse files
committed
fix lit tests after rebase to main.
1 parent 72246b4 commit b006c30

15 files changed

+39903
-18904
lines changed

llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll

Lines changed: 4710 additions & 1934 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll

Lines changed: 16262 additions & 4405 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/ds-combine-with-dependence.ll

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:26 offset1:27
1010
; GCN-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:7 offset1:8
1111
; GCN: s_waitcnt lgkmcnt({{[0-9]+}})
12-
define amdgpu_kernel void @ds_combine_nodep(ptr addrspace(1) %out, ptr addrspace(3) %inptr) {
12+
define amdgpu_kernel void @ds_combine_nodep(ptr addrspace(1) %out, ptr addrspace(3) %inptr) #0 {
1313

1414
%addr0 = getelementptr i8, ptr addrspace(3) %inptr, i32 24
1515
%load0 = load <3 x float>, ptr addrspace(3) %addr0, align 4
@@ -36,8 +36,8 @@ define amdgpu_kernel void @ds_combine_nodep(ptr addrspace(1) %out, ptr addrspace
3636
; GCN-LABEL: {{^}}ds_combine_WAR
3737

3838
; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:7 offset1:27
39-
; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:26 offset1:27
40-
define amdgpu_kernel void @ds_combine_WAR(ptr addrspace(1) %out, ptr addrspace(3) %inptr) {
39+
; GCN-NEXT: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:26 offset1:27
40+
define amdgpu_kernel void @ds_combine_WAR(ptr addrspace(1) %out, ptr addrspace(3) %inptr) #0 {
4141

4242
%addr0 = getelementptr i8, ptr addrspace(3) %inptr, i32 100
4343
%load0 = load <3 x float>, ptr addrspace(3) %addr0, align 4
@@ -67,7 +67,7 @@ define amdgpu_kernel void @ds_combine_WAR(ptr addrspace(1) %out, ptr addrspace(3
6767
; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:26 offset1:27
6868
; GCN-NEXT: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
6969
; GCN-NEXT: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:104
70-
define amdgpu_kernel void @ds_combine_RAW(ptr addrspace(1) %out, ptr addrspace(3) %inptr) {
70+
define amdgpu_kernel void @ds_combine_RAW(ptr addrspace(1) %out, ptr addrspace(3) %inptr) #0 {
7171

7272
%addr0 = getelementptr i8, ptr addrspace(3) %inptr, i32 24
7373
%load0 = load <3 x float>, ptr addrspace(3) %addr0, align 4
@@ -96,7 +96,7 @@ define amdgpu_kernel void @ds_combine_RAW(ptr addrspace(1) %out, ptr addrspace(3
9696
; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:108
9797
; GCN-NEXT: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:26 offset1:27
9898
; GCN-NEXT: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:104
99-
define amdgpu_kernel void @ds_combine_WAR_RAW(ptr addrspace(1) %out, ptr addrspace(3) %inptr) {
99+
define amdgpu_kernel void @ds_combine_WAR_RAW(ptr addrspace(1) %out, ptr addrspace(3) %inptr) #0 {
100100

101101
%addr0 = getelementptr i8, ptr addrspace(3) %inptr, i32 100
102102
%load0 = load <3 x float>, ptr addrspace(3) %addr0, align 4
@@ -115,3 +115,5 @@ define amdgpu_kernel void @ds_combine_WAR_RAW(ptr addrspace(1) %out, ptr addrspa
115115
store float %sum, ptr addrspace(1) %out, align 4
116116
ret void
117117
}
118+
119+
attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }

llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -169,35 +169,6 @@ define amdgpu_kernel void @s_test_canonicalize_var_f16(ptr addrspace(1) %out, i1
169169
ret void
170170
}
171171

172-
define half @s_test_canonicalize_arg(half %x) #1 {
173-
; VI-LABEL: s_test_canonicalize_arg:
174-
; VI: ; %bb.0:
175-
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176-
; VI-NEXT: v_max_f16_e32 v0, v0, v0
177-
; VI-NEXT: s_setpc_b64 s[30:31]
178-
;
179-
; GFX9-LABEL: s_test_canonicalize_arg:
180-
; GFX9: ; %bb.0:
181-
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182-
; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
183-
; GFX9-NEXT: s_setpc_b64 s[30:31]
184-
;
185-
; CI-LABEL: s_test_canonicalize_arg:
186-
; CI: ; %bb.0:
187-
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
188-
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
189-
; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
190-
; CI-NEXT: s_setpc_b64 s[30:31]
191-
;
192-
; GFX11-LABEL: s_test_canonicalize_arg:
193-
; GFX11: ; %bb.0:
194-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195-
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
196-
; GFX11-NEXT: s_setpc_b64 s[30:31]
197-
%canonicalized = call half @llvm.canonicalize.f16(half %x)
198-
ret half %canonicalized
199-
}
200-
201172
define <2 x half> @v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1 {
202173
; VI-LABEL: v_test_canonicalize_build_vector_v2f16:
203174
; VI: ; %bb.0:

llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll

Lines changed: 0 additions & 258 deletions
Original file line numberDiff line numberDiff line change
@@ -2256,264 +2256,6 @@ main_body:
22562256
ret double %ret
22572257
}
22582258

2259-
define double @flat_atomic_fadd_f64_intrinsic_rtn__posoffset(ptr %ptr, double %data) #1 {
2260-
; GFX90A-LABEL: flat_atomic_fadd_f64_intrinsic_rtn__posoffset:
2261-
; GFX90A: ; %bb.0:
2262-
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2263-
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
2264-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2265-
; GFX90A-NEXT: s_setpc_b64 s[30:31]
2266-
;
2267-
; GFX940-LABEL: flat_atomic_fadd_f64_intrinsic_rtn__posoffset:
2268-
; GFX940: ; %bb.0:
2269-
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2270-
; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
2271-
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2272-
; GFX940-NEXT: s_setpc_b64 s[30:31]
2273-
%gep = getelementptr double, ptr %ptr, i64 511
2274-
%ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
2275-
ret double %ret
2276-
}
2277-
2278-
define double @flat_atomic_fadd_f64_intrinsic_rtn__negoffset(ptr %ptr, double %data) #1 {
2279-
; GFX90A-LABEL: flat_atomic_fadd_f64_intrinsic_rtn__negoffset:
2280-
; GFX90A: ; %bb.0:
2281-
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2282-
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2283-
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2284-
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
2285-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2286-
; GFX90A-NEXT: s_setpc_b64 s[30:31]
2287-
;
2288-
; GFX940-LABEL: flat_atomic_fadd_f64_intrinsic_rtn__negoffset:
2289-
; GFX940: ; %bb.0:
2290-
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2291-
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2292-
; GFX940-NEXT: s_nop 1
2293-
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2294-
; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
2295-
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2296-
; GFX940-NEXT: s_setpc_b64 s[30:31]
2297-
%gep = getelementptr double, ptr %ptr, i64 -511
2298-
%ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %gep, double %data)
2299-
ret double %ret
2300-
}
2301-
2302-
define void @flat_atomic_fadd_f64_intrinsic_noret__posoffset(ptr %ptr, double %data) #1 {
2303-
; GFX90A-LABEL: flat_atomic_fadd_f64_intrinsic_noret__posoffset:
2304-
; GFX90A: ; %bb.0:
2305-
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2306-
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
2307-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2308-
; GFX90A-NEXT: s_setpc_b64 s[30:31]
2309-
;
2310-
; GFX940-LABEL: flat_atomic_fadd_f64_intrinsic_noret__posoffset:
2311-
; GFX940: ; %bb.0:
2312-
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2313-
; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
2314-
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2315-
; GFX940-NEXT: s_setpc_b64 s[30:31]
2316-
%gep = getelementptr double, ptr %ptr, i64 511
2317-
%unused = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
2318-
ret void
2319-
}
2320-
2321-
define void @flat_atomic_fadd_f64_intrinsic_noret__negoffset(ptr %ptr, double %data) #1 {
2322-
; GFX90A-LABEL: flat_atomic_fadd_f64_intrinsic_noret__negoffset:
2323-
; GFX90A: ; %bb.0:
2324-
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2325-
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2326-
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2327-
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
2328-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2329-
; GFX90A-NEXT: s_setpc_b64 s[30:31]
2330-
;
2331-
; GFX940-LABEL: flat_atomic_fadd_f64_intrinsic_noret__negoffset:
2332-
; GFX940: ; %bb.0:
2333-
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2334-
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2335-
; GFX940-NEXT: s_nop 1
2336-
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2337-
; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
2338-
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2339-
; GFX940-NEXT: s_setpc_b64 s[30:31]
2340-
%gep = getelementptr double, ptr %ptr, i64 -511
2341-
%unused = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %gep, double %data)
2342-
ret void
2343-
}
2344-
2345-
define double @flat_atomic_fmin_f64_intrinsic_rtn__posoffset(ptr %ptr, double %data) #1 {
2346-
; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__posoffset:
2347-
; GFX90A: ; %bb.0:
2348-
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2349-
; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
2350-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2351-
; GFX90A-NEXT: s_setpc_b64 s[30:31]
2352-
;
2353-
; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__posoffset:
2354-
; GFX940: ; %bb.0:
2355-
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2356-
; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0
2357-
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2358-
; GFX940-NEXT: s_setpc_b64 s[30:31]
2359-
%gep = getelementptr double, ptr %ptr, i64 511
2360-
%ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
2361-
ret double %ret
2362-
}
2363-
2364-
define double @flat_atomic_fmin_f64_intrinsic_rtn__negoffset(ptr %ptr, double %data) #1 {
2365-
; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__negoffset:
2366-
; GFX90A: ; %bb.0:
2367-
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2368-
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2369-
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2370-
; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
2371-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2372-
; GFX90A-NEXT: s_setpc_b64 s[30:31]
2373-
;
2374-
; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__negoffset:
2375-
; GFX940: ; %bb.0:
2376-
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2377-
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2378-
; GFX940-NEXT: s_nop 1
2379-
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2380-
; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0
2381-
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2382-
; GFX940-NEXT: s_setpc_b64 s[30:31]
2383-
%gep = getelementptr double, ptr %ptr, i64 -511
2384-
%ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %gep, double %data)
2385-
ret double %ret
2386-
}
2387-
2388-
define void @flat_atomic_fmin_f64_intrinsic_noret__posoffset(ptr %ptr, double %data) #1 {
2389-
; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_noret__posoffset:
2390-
; GFX90A: ; %bb.0:
2391-
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2392-
; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
2393-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2394-
; GFX90A-NEXT: s_setpc_b64 s[30:31]
2395-
;
2396-
; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_noret__posoffset:
2397-
; GFX940: ; %bb.0:
2398-
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2399-
; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
2400-
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2401-
; GFX940-NEXT: s_setpc_b64 s[30:31]
2402-
%gep = getelementptr double, ptr %ptr, i64 511
2403-
%unused = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
2404-
ret void
2405-
}
2406-
2407-
define void @flat_atomic_fmin_f64_intrinsic_noret__negoffset(ptr %ptr, double %data) #1 {
2408-
; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_noret__negoffset:
2409-
; GFX90A: ; %bb.0:
2410-
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2411-
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2412-
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2413-
; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
2414-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2415-
; GFX90A-NEXT: s_setpc_b64 s[30:31]
2416-
;
2417-
; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_noret__negoffset:
2418-
; GFX940: ; %bb.0:
2419-
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2420-
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2421-
; GFX940-NEXT: s_nop 1
2422-
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2423-
; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
2424-
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2425-
; GFX940-NEXT: s_setpc_b64 s[30:31]
2426-
%gep = getelementptr double, ptr %ptr, i64 -511
2427-
%unused = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %gep, double %data)
2428-
ret void
2429-
}
2430-
2431-
define double @flat_atomic_fmax_f64_intrinsic_rtn__posoffset(ptr %ptr, double %data) #1 {
2432-
; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__posoffset:
2433-
; GFX90A: ; %bb.0:
2434-
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2435-
; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc
2436-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2437-
; GFX90A-NEXT: s_setpc_b64 s[30:31]
2438-
;
2439-
; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__posoffset:
2440-
; GFX940: ; %bb.0:
2441-
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2442-
; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0
2443-
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2444-
; GFX940-NEXT: s_setpc_b64 s[30:31]
2445-
%gep = getelementptr double, ptr %ptr, i64 511
2446-
%ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
2447-
ret double %ret
2448-
}
2449-
2450-
define double @flat_atomic_fmax_f64_intrinsic_rtn__negoffset(ptr %ptr, double %data) #1 {
2451-
; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__negoffset:
2452-
; GFX90A: ; %bb.0:
2453-
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2454-
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2455-
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2456-
; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc
2457-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2458-
; GFX90A-NEXT: s_setpc_b64 s[30:31]
2459-
;
2460-
; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__negoffset:
2461-
; GFX940: ; %bb.0:
2462-
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2463-
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2464-
; GFX940-NEXT: s_nop 1
2465-
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2466-
; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0
2467-
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2468-
; GFX940-NEXT: s_setpc_b64 s[30:31]
2469-
%gep = getelementptr double, ptr %ptr, i64 -511
2470-
%ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %gep, double %data)
2471-
ret double %ret
2472-
}
2473-
2474-
define void @flat_atomic_fmax_f64_intrinsic_noret__posoffset(ptr %ptr, double %data) #1 {
2475-
; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_noret__posoffset:
2476-
; GFX90A: ; %bb.0:
2477-
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2478-
; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
2479-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2480-
; GFX90A-NEXT: s_setpc_b64 s[30:31]
2481-
;
2482-
; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_noret__posoffset:
2483-
; GFX940: ; %bb.0:
2484-
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2485-
; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
2486-
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2487-
; GFX940-NEXT: s_setpc_b64 s[30:31]
2488-
%gep = getelementptr double, ptr %ptr, i64 511
2489-
%unused = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
2490-
ret void
2491-
}
2492-
2493-
define void @flat_atomic_fmax_f64_intrinsic_noret__negoffset(ptr %ptr, double %data) #1 {
2494-
; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_noret__negoffset:
2495-
; GFX90A: ; %bb.0:
2496-
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2497-
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2498-
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2499-
; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
2500-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2501-
; GFX90A-NEXT: s_setpc_b64 s[30:31]
2502-
;
2503-
; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_noret__negoffset:
2504-
; GFX940: ; %bb.0:
2505-
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2506-
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2507-
; GFX940-NEXT: s_nop 1
2508-
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2509-
; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
2510-
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2511-
; GFX940-NEXT: s_setpc_b64 s[30:31]
2512-
%gep = getelementptr double, ptr %ptr, i64 -511
2513-
%unused = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %gep, double %data)
2514-
ret void
2515-
}
2516-
25172259
attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
25182260
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" }
25192261
attributes #2 = { "denormal-fp-math"="ieee,ieee" "amdgpu-unsafe-fp-atomics"="true" }

0 commit comments

Comments
 (0)