@@ -2256,264 +2256,6 @@ main_body:
2256
2256
ret double %ret
2257
2257
}
2258
2258
2259
- define double @flat_atomic_fadd_f64_intrinsic_rtn__posoffset (ptr %ptr , double %data ) #1 {
2260
- ; GFX90A-LABEL: flat_atomic_fadd_f64_intrinsic_rtn__posoffset:
2261
- ; GFX90A: ; %bb.0:
2262
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2263
- ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
2264
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2265
- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2266
- ;
2267
- ; GFX940-LABEL: flat_atomic_fadd_f64_intrinsic_rtn__posoffset:
2268
- ; GFX940: ; %bb.0:
2269
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2270
- ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
2271
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2272
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2273
- %gep = getelementptr double , ptr %ptr , i64 511
2274
- %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64 (ptr %ptr , double %data )
2275
- ret double %ret
2276
- }
2277
-
2278
- define double @flat_atomic_fadd_f64_intrinsic_rtn__negoffset (ptr %ptr , double %data ) #1 {
2279
- ; GFX90A-LABEL: flat_atomic_fadd_f64_intrinsic_rtn__negoffset:
2280
- ; GFX90A: ; %bb.0:
2281
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2282
- ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2283
- ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2284
- ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
2285
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2286
- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2287
- ;
2288
- ; GFX940-LABEL: flat_atomic_fadd_f64_intrinsic_rtn__negoffset:
2289
- ; GFX940: ; %bb.0:
2290
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2291
- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2292
- ; GFX940-NEXT: s_nop 1
2293
- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2294
- ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
2295
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2296
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2297
- %gep = getelementptr double , ptr %ptr , i64 -511
2298
- %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64 (ptr %gep , double %data )
2299
- ret double %ret
2300
- }
2301
-
2302
- define void @flat_atomic_fadd_f64_intrinsic_noret__posoffset (ptr %ptr , double %data ) #1 {
2303
- ; GFX90A-LABEL: flat_atomic_fadd_f64_intrinsic_noret__posoffset:
2304
- ; GFX90A: ; %bb.0:
2305
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2306
- ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
2307
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2308
- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2309
- ;
2310
- ; GFX940-LABEL: flat_atomic_fadd_f64_intrinsic_noret__posoffset:
2311
- ; GFX940: ; %bb.0:
2312
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2313
- ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
2314
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2315
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2316
- %gep = getelementptr double , ptr %ptr , i64 511
2317
- %unused = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64 (ptr %ptr , double %data )
2318
- ret void
2319
- }
2320
-
2321
- define void @flat_atomic_fadd_f64_intrinsic_noret__negoffset (ptr %ptr , double %data ) #1 {
2322
- ; GFX90A-LABEL: flat_atomic_fadd_f64_intrinsic_noret__negoffset:
2323
- ; GFX90A: ; %bb.0:
2324
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2325
- ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2326
- ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2327
- ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
2328
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2329
- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2330
- ;
2331
- ; GFX940-LABEL: flat_atomic_fadd_f64_intrinsic_noret__negoffset:
2332
- ; GFX940: ; %bb.0:
2333
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2334
- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2335
- ; GFX940-NEXT: s_nop 1
2336
- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2337
- ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
2338
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2339
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2340
- %gep = getelementptr double , ptr %ptr , i64 -511
2341
- %unused = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64 (ptr %gep , double %data )
2342
- ret void
2343
- }
2344
-
2345
- define double @flat_atomic_fmin_f64_intrinsic_rtn__posoffset (ptr %ptr , double %data ) #1 {
2346
- ; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__posoffset:
2347
- ; GFX90A: ; %bb.0:
2348
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2349
- ; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
2350
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2351
- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2352
- ;
2353
- ; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__posoffset:
2354
- ; GFX940: ; %bb.0:
2355
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2356
- ; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0
2357
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2358
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2359
- %gep = getelementptr double , ptr %ptr , i64 511
2360
- %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64 (ptr %ptr , double %data )
2361
- ret double %ret
2362
- }
2363
-
2364
- define double @flat_atomic_fmin_f64_intrinsic_rtn__negoffset (ptr %ptr , double %data ) #1 {
2365
- ; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__negoffset:
2366
- ; GFX90A: ; %bb.0:
2367
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2368
- ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2369
- ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2370
- ; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
2371
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2372
- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2373
- ;
2374
- ; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__negoffset:
2375
- ; GFX940: ; %bb.0:
2376
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2377
- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2378
- ; GFX940-NEXT: s_nop 1
2379
- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2380
- ; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0
2381
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2382
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2383
- %gep = getelementptr double , ptr %ptr , i64 -511
2384
- %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64 (ptr %gep , double %data )
2385
- ret double %ret
2386
- }
2387
-
2388
- define void @flat_atomic_fmin_f64_intrinsic_noret__posoffset (ptr %ptr , double %data ) #1 {
2389
- ; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_noret__posoffset:
2390
- ; GFX90A: ; %bb.0:
2391
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2392
- ; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
2393
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2394
- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2395
- ;
2396
- ; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_noret__posoffset:
2397
- ; GFX940: ; %bb.0:
2398
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2399
- ; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
2400
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2401
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2402
- %gep = getelementptr double , ptr %ptr , i64 511
2403
- %unused = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64 (ptr %ptr , double %data )
2404
- ret void
2405
- }
2406
-
2407
- define void @flat_atomic_fmin_f64_intrinsic_noret__negoffset (ptr %ptr , double %data ) #1 {
2408
- ; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_noret__negoffset:
2409
- ; GFX90A: ; %bb.0:
2410
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2411
- ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2412
- ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2413
- ; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
2414
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2415
- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2416
- ;
2417
- ; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_noret__negoffset:
2418
- ; GFX940: ; %bb.0:
2419
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2420
- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2421
- ; GFX940-NEXT: s_nop 1
2422
- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2423
- ; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
2424
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2425
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2426
- %gep = getelementptr double , ptr %ptr , i64 -511
2427
- %unused = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64 (ptr %gep , double %data )
2428
- ret void
2429
- }
2430
-
2431
- define double @flat_atomic_fmax_f64_intrinsic_rtn__posoffset (ptr %ptr , double %data ) #1 {
2432
- ; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__posoffset:
2433
- ; GFX90A: ; %bb.0:
2434
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2435
- ; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc
2436
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2437
- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2438
- ;
2439
- ; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__posoffset:
2440
- ; GFX940: ; %bb.0:
2441
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2442
- ; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0
2443
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2444
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2445
- %gep = getelementptr double , ptr %ptr , i64 511
2446
- %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64 (ptr %ptr , double %data )
2447
- ret double %ret
2448
- }
2449
-
2450
- define double @flat_atomic_fmax_f64_intrinsic_rtn__negoffset (ptr %ptr , double %data ) #1 {
2451
- ; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__negoffset:
2452
- ; GFX90A: ; %bb.0:
2453
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2454
- ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2455
- ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2456
- ; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc
2457
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2458
- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2459
- ;
2460
- ; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__negoffset:
2461
- ; GFX940: ; %bb.0:
2462
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2463
- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2464
- ; GFX940-NEXT: s_nop 1
2465
- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2466
- ; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0
2467
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2468
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2469
- %gep = getelementptr double , ptr %ptr , i64 -511
2470
- %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64 (ptr %gep , double %data )
2471
- ret double %ret
2472
- }
2473
-
2474
- define void @flat_atomic_fmax_f64_intrinsic_noret__posoffset (ptr %ptr , double %data ) #1 {
2475
- ; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_noret__posoffset:
2476
- ; GFX90A: ; %bb.0:
2477
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2478
- ; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
2479
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2480
- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2481
- ;
2482
- ; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_noret__posoffset:
2483
- ; GFX940: ; %bb.0:
2484
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2485
- ; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
2486
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2487
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2488
- %gep = getelementptr double , ptr %ptr , i64 511
2489
- %unused = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64 (ptr %ptr , double %data )
2490
- ret void
2491
- }
2492
-
2493
- define void @flat_atomic_fmax_f64_intrinsic_noret__negoffset (ptr %ptr , double %data ) #1 {
2494
- ; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_noret__negoffset:
2495
- ; GFX90A: ; %bb.0:
2496
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2497
- ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2498
- ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2499
- ; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
2500
- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2501
- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2502
- ;
2503
- ; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_noret__negoffset:
2504
- ; GFX940: ; %bb.0:
2505
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2506
- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2507
- ; GFX940-NEXT: s_nop 1
2508
- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2509
- ; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
2510
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2511
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2512
- %gep = getelementptr double , ptr %ptr , i64 -511
2513
- %unused = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64 (ptr %gep , double %data )
2514
- ret void
2515
- }
2516
-
2517
2259
attributes #0 = { "denormal-fp-math" ="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics" ="true" }
2518
2260
attributes #1 = { "amdgpu-unsafe-fp-atomics" ="true" }
2519
2261
attributes #2 = { "denormal-fp-math" ="ieee,ieee" "amdgpu-unsafe-fp-atomics" ="true" }
0 commit comments