@@ -416,298 +416,4 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16>
416
416
ret <2 x i16 > %ret
417
417
}
418
418
419
- define float @flat_atomic_fadd_f32_intrinsic_ret__posoffset (ptr %ptr , float %data ) {
420
- ; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_ret__posoffset:
421
- ; GFX940: ; %bb.0:
422
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
423
- ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:4092 sc0
424
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
425
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
426
- ;
427
- ; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_ret__posoffset:
428
- ; GFX12: ; %bb.0:
429
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
430
- ; GFX12-NEXT: s_wait_expcnt 0x0
431
- ; GFX12-NEXT: s_wait_samplecnt 0x0
432
- ; GFX12-NEXT: s_wait_bvhcnt 0x0
433
- ; GFX12-NEXT: s_wait_kmcnt 0x0
434
- ; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:4092 th:TH_ATOMIC_RETURN
435
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
436
- ; GFX12-NEXT: s_setpc_b64 s[30:31]
437
- %gep = getelementptr float , ptr %ptr , i64 1023
438
- %result = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32 (ptr %gep , float %data )
439
- ret float %result
440
- }
441
-
442
- define float @flat_atomic_fadd_f32_intrinsic_ret__negoffset (ptr %ptr , float %data ) {
443
- ; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_ret__negoffset:
444
- ; GFX940: ; %bb.0:
445
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
446
- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffc00, v0
447
- ; GFX940-NEXT: s_nop 1
448
- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
449
- ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
450
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
451
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
452
- ;
453
- ; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_ret__negoffset:
454
- ; GFX12: ; %bb.0:
455
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
456
- ; GFX12-NEXT: s_wait_expcnt 0x0
457
- ; GFX12-NEXT: s_wait_samplecnt 0x0
458
- ; GFX12-NEXT: s_wait_bvhcnt 0x0
459
- ; GFX12-NEXT: s_wait_kmcnt 0x0
460
- ; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:-1024 th:TH_ATOMIC_RETURN
461
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
462
- ; GFX12-NEXT: s_setpc_b64 s[30:31]
463
- %gep = getelementptr float , ptr %ptr , i64 -256
464
- %result = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32 (ptr %gep , float %data )
465
- ret float %result
466
- }
467
-
468
- define void @flat_atomic_fadd_f32_intrinsic_noret__posoffset (ptr %ptr , float %data ) {
469
- ; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_noret__posoffset:
470
- ; GFX940: ; %bb.0:
471
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
472
- ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:4092
473
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
474
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
475
- ;
476
- ; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_noret__posoffset:
477
- ; GFX12: ; %bb.0:
478
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
479
- ; GFX12-NEXT: s_wait_expcnt 0x0
480
- ; GFX12-NEXT: s_wait_samplecnt 0x0
481
- ; GFX12-NEXT: s_wait_bvhcnt 0x0
482
- ; GFX12-NEXT: s_wait_kmcnt 0x0
483
- ; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:4092
484
- ; GFX12-NEXT: s_wait_dscnt 0x0
485
- ; GFX12-NEXT: s_setpc_b64 s[30:31]
486
- %gep = getelementptr float , ptr %ptr , i64 1023
487
- %unused = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32 (ptr %gep , float %data )
488
- ret void
489
- }
490
-
491
- define void @flat_atomic_fadd_f32_intrinsic_noret__negoffset (ptr %ptr , float %data ) {
492
- ; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_noret__negoffset:
493
- ; GFX940: ; %bb.0:
494
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
495
- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffc00, v0
496
- ; GFX940-NEXT: s_nop 1
497
- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
498
- ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2
499
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
500
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
501
- ;
502
- ; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_noret__negoffset:
503
- ; GFX12: ; %bb.0:
504
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
505
- ; GFX12-NEXT: s_wait_expcnt 0x0
506
- ; GFX12-NEXT: s_wait_samplecnt 0x0
507
- ; GFX12-NEXT: s_wait_bvhcnt 0x0
508
- ; GFX12-NEXT: s_wait_kmcnt 0x0
509
- ; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:-1024
510
- ; GFX12-NEXT: s_wait_dscnt 0x0
511
- ; GFX12-NEXT: s_setpc_b64 s[30:31]
512
- %gep = getelementptr float , ptr %ptr , i64 -256
513
- %unused = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32 (ptr %gep , float %data )
514
- ret void
515
- }
516
-
517
- define <2 x half > @flat_atomic_fadd_v2f16_intrinsic_ret__posoffset (ptr %ptr , <2 x half > %data ) {
518
- ; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__posoffset:
519
- ; GFX940: ; %bb.0:
520
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521
- ; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:4092 sc0
522
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
523
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
524
- ;
525
- ; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__posoffset:
526
- ; GFX12: ; %bb.0:
527
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
528
- ; GFX12-NEXT: s_wait_expcnt 0x0
529
- ; GFX12-NEXT: s_wait_samplecnt 0x0
530
- ; GFX12-NEXT: s_wait_bvhcnt 0x0
531
- ; GFX12-NEXT: s_wait_kmcnt 0x0
532
- ; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:4092 th:TH_ATOMIC_RETURN
533
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
534
- ; GFX12-NEXT: s_setpc_b64 s[30:31]
535
- %gep = getelementptr <2 x half >, ptr %ptr , i64 1023
536
- %result = call <2 x half > @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16 (ptr %gep , <2 x half > %data )
537
- ret <2 x half > %result
538
- }
539
-
540
- define <2 x half > @flat_atomic_fadd_v2f16_intrinsic_ret__negoffset (ptr %ptr , <2 x half > %data ) {
541
- ; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__negoffset:
542
- ; GFX940: ; %bb.0:
543
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544
- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffc00, v0
545
- ; GFX940-NEXT: s_nop 1
546
- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
547
- ; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
548
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
549
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
550
- ;
551
- ; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__negoffset:
552
- ; GFX12: ; %bb.0:
553
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
554
- ; GFX12-NEXT: s_wait_expcnt 0x0
555
- ; GFX12-NEXT: s_wait_samplecnt 0x0
556
- ; GFX12-NEXT: s_wait_bvhcnt 0x0
557
- ; GFX12-NEXT: s_wait_kmcnt 0x0
558
- ; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:-1024 th:TH_ATOMIC_RETURN
559
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
560
- ; GFX12-NEXT: s_setpc_b64 s[30:31]
561
- %gep = getelementptr <2 x half >, ptr %ptr , i64 -256
562
- %result = call <2 x half > @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16 (ptr %gep , <2 x half > %data )
563
- ret <2 x half > %result
564
- }
565
-
566
- define void @flat_atomic_fadd_v2f16_intrinsic_noret__posoffset (ptr %ptr , <2 x half > %data ) {
567
- ; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__posoffset:
568
- ; GFX940: ; %bb.0:
569
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
570
- ; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:4092
571
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
572
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
573
- ;
574
- ; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__posoffset:
575
- ; GFX12: ; %bb.0:
576
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
577
- ; GFX12-NEXT: s_wait_expcnt 0x0
578
- ; GFX12-NEXT: s_wait_samplecnt 0x0
579
- ; GFX12-NEXT: s_wait_bvhcnt 0x0
580
- ; GFX12-NEXT: s_wait_kmcnt 0x0
581
- ; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:4092
582
- ; GFX12-NEXT: s_wait_dscnt 0x0
583
- ; GFX12-NEXT: s_setpc_b64 s[30:31]
584
- %gep = getelementptr <2 x half >, ptr %ptr , i64 1023
585
- %unused = call <2 x half > @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16 (ptr %gep , <2 x half > %data )
586
- ret void
587
- }
588
-
589
- define void @flat_atomic_fadd_v2f16_intrinsic_noret__negoffset (ptr %ptr , <2 x half > %data ) {
590
- ; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__negoffset:
591
- ; GFX940: ; %bb.0:
592
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
593
- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffc00, v0
594
- ; GFX940-NEXT: s_nop 1
595
- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
596
- ; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2
597
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
598
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
599
- ;
600
- ; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__negoffset:
601
- ; GFX12: ; %bb.0:
602
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
603
- ; GFX12-NEXT: s_wait_expcnt 0x0
604
- ; GFX12-NEXT: s_wait_samplecnt 0x0
605
- ; GFX12-NEXT: s_wait_bvhcnt 0x0
606
- ; GFX12-NEXT: s_wait_kmcnt 0x0
607
- ; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:-1024
608
- ; GFX12-NEXT: s_wait_dscnt 0x0
609
- ; GFX12-NEXT: s_setpc_b64 s[30:31]
610
- %gep = getelementptr <2 x half >, ptr %ptr , i64 -256
611
- %unused = call <2 x half > @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16 (ptr %gep , <2 x half > %data )
612
- ret void
613
- }
614
-
615
- define <2 x i16 > @flat_atomic_fadd_v2bf16_intrinsic_ret__posoffset (ptr %ptr , <2 x i16 > %data ) {
616
- ; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__posoffset:
617
- ; GFX940: ; %bb.0:
618
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619
- ; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:4092 sc0
620
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
621
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
622
- ;
623
- ; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__posoffset:
624
- ; GFX12: ; %bb.0:
625
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
626
- ; GFX12-NEXT: s_wait_expcnt 0x0
627
- ; GFX12-NEXT: s_wait_samplecnt 0x0
628
- ; GFX12-NEXT: s_wait_bvhcnt 0x0
629
- ; GFX12-NEXT: s_wait_kmcnt 0x0
630
- ; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:4092 th:TH_ATOMIC_RETURN
631
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
632
- ; GFX12-NEXT: s_setpc_b64 s[30:31]
633
- %gep = getelementptr <2 x i16 >, ptr %ptr , i64 1023
634
- %result = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16 (ptr %gep , <2 x i16 > %data )
635
- ret <2 x i16 > %result
636
- }
637
-
638
- define <2 x i16 > @flat_atomic_fadd_v2bf16_intrinsic_ret__negoffset (ptr %ptr , <2 x i16 > %data ) {
639
- ; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__negoffset:
640
- ; GFX940: ; %bb.0:
641
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
642
- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffc00, v0
643
- ; GFX940-NEXT: s_nop 1
644
- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
645
- ; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
646
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
647
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
648
- ;
649
- ; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__negoffset:
650
- ; GFX12: ; %bb.0:
651
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
652
- ; GFX12-NEXT: s_wait_expcnt 0x0
653
- ; GFX12-NEXT: s_wait_samplecnt 0x0
654
- ; GFX12-NEXT: s_wait_bvhcnt 0x0
655
- ; GFX12-NEXT: s_wait_kmcnt 0x0
656
- ; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:-1024 th:TH_ATOMIC_RETURN
657
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
658
- ; GFX12-NEXT: s_setpc_b64 s[30:31]
659
- %gep = getelementptr <2 x i16 >, ptr %ptr , i64 -256
660
- %result = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16 (ptr %gep , <2 x i16 > %data )
661
- ret <2 x i16 > %result
662
- }
663
-
664
- define void @flat_atomic_fadd_v2bf16_intrinsic_noret__posoffset (ptr %ptr , <2 x i16 > %data ) {
665
- ; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__posoffset:
666
- ; GFX940: ; %bb.0:
667
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
668
- ; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:4092
669
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
670
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
671
- ;
672
- ; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__posoffset:
673
- ; GFX12: ; %bb.0:
674
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
675
- ; GFX12-NEXT: s_wait_expcnt 0x0
676
- ; GFX12-NEXT: s_wait_samplecnt 0x0
677
- ; GFX12-NEXT: s_wait_bvhcnt 0x0
678
- ; GFX12-NEXT: s_wait_kmcnt 0x0
679
- ; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:4092
680
- ; GFX12-NEXT: s_wait_dscnt 0x0
681
- ; GFX12-NEXT: s_setpc_b64 s[30:31]
682
- %gep = getelementptr <2 x i16 >, ptr %ptr , i64 1023
683
- %unused = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16 (ptr %gep , <2 x i16 > %data )
684
- ret void
685
- }
686
-
687
- define void @flat_atomic_fadd_v2bf16_intrinsic_noret__negoffset (ptr %ptr , <2 x i16 > %data ) {
688
- ; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__negoffset:
689
- ; GFX940: ; %bb.0:
690
- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691
- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffc00, v0
692
- ; GFX940-NEXT: s_nop 1
693
- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
694
- ; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2
695
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
696
- ; GFX940-NEXT: s_setpc_b64 s[30:31]
697
- ;
698
- ; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__negoffset:
699
- ; GFX12: ; %bb.0:
700
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
701
- ; GFX12-NEXT: s_wait_expcnt 0x0
702
- ; GFX12-NEXT: s_wait_samplecnt 0x0
703
- ; GFX12-NEXT: s_wait_bvhcnt 0x0
704
- ; GFX12-NEXT: s_wait_kmcnt 0x0
705
- ; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:-1024
706
- ; GFX12-NEXT: s_wait_dscnt 0x0
707
- ; GFX12-NEXT: s_setpc_b64 s[30:31]
708
- %gep = getelementptr <2 x i16 >, ptr %ptr , i64 -256
709
- %unused = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16 (ptr %gep , <2 x i16 > %data )
710
- ret void
711
- }
712
-
713
419
attributes #0 = { "denormal-fp-math-f32" ="ieee,ieee" }
0 commit comments