@@ -433,6 +433,29 @@ define i32 @reduce_add(ptr %src) {
433
433
ret i32 %add.1
434
434
}
435
435
436
+ define float @reduce_fadd (ptr %src ) {
437
+ ; CHECK-LABEL: @reduce_fadd(
438
+ ; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
439
+ ; CHECK-NEXT: [[L_SRC_0:%.*]] = load float, ptr [[GEP_SRC_0]], align 4
440
+ ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 1
441
+ ; CHECK-NEXT: [[L_SRC_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
442
+ ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2
443
+ ; CHECK-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
444
+ ; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast float [[L_SRC_0]], [[L_SRC_1]]
445
+ ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[L_SRC_2]]
446
+ ; CHECK-NEXT: ret float [[ADD_1]]
447
+ ;
448
+ %gep.src.0 = getelementptr inbounds float , ptr %src , i32 0
449
+ %l.src.0 = load float , ptr %gep.src.0 , align 4
450
+ %gep.src.1 = getelementptr inbounds float , ptr %src , i32 1
451
+ %l.src.1 = load float , ptr %gep.src.1 , align 4
452
+ %gep.src.2 = getelementptr inbounds float , ptr %src , i32 2
453
+ %l.src.2 = load float , ptr %gep.src.2 , align 4
454
+
455
+ %add.0 = fadd fast float %l.src.0 , %l.src.1
456
+ %add.1 = fadd fast float %add.0 , %l.src.2
457
+ ret float %add.1
458
+ }
436
459
437
460
define i32 @reduce_add_after_mul (ptr %src ) {
438
461
; CHECK-LABEL: @reduce_add_after_mul(
@@ -465,8 +488,8 @@ define i32 @reduce_add_after_mul(ptr %src) {
465
488
ret i32 %add.1
466
489
}
467
490
468
- define i32 @dot_product (ptr %a , ptr %b ) {
469
- ; CHECK-LABEL: @dot_product (
491
+ define i32 @dot_product_i32 (ptr %a , ptr %b ) {
492
+ ; CHECK-LABEL: @dot_product_i32 (
470
493
; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
471
494
; CHECK-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4
472
495
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
@@ -509,6 +532,89 @@ define i32 @dot_product(ptr %a, ptr %b) {
509
532
ret i32 %add.1
510
533
}
511
534
535
+ define float @dot_product_fp32 (ptr %a , ptr %b ) {
536
+ ; CHECK-LABEL: @dot_product_fp32(
537
+ ; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
538
+ ; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
539
+ ; CHECK-NEXT: [[L_A_2:%.*]] = load float, ptr [[GEP_A_2]], align 4
540
+ ; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
541
+ ; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
542
+ ; CHECK-NEXT: [[L_B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
543
+ ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_A_0]], align 4
544
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4
545
+ ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]]
546
+ ; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]]
547
+ ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
548
+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
549
+ ; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
550
+ ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
551
+ ; CHECK-NEXT: ret float [[ADD_1]]
552
+ ;
553
+ %gep.a.0 = getelementptr inbounds float , ptr %a , i32 0
554
+ %l.a.0 = load float , ptr %gep.a.0 , align 4
555
+ %gep.a.1 = getelementptr inbounds float , ptr %a , i32 1
556
+ %l.a.1 = load float , ptr %gep.a.1 , align 4
557
+ %gep.a.2 = getelementptr inbounds float , ptr %a , i32 2
558
+ %l.a.2 = load float , ptr %gep.a.2 , align 4
559
+
560
+ %gep.b.0 = getelementptr inbounds float , ptr %b , i32 0
561
+ %l.b.0 = load float , ptr %gep.b.0 , align 4
562
+ %gep.b.1 = getelementptr inbounds float , ptr %b , i32 1
563
+ %l.b.1 = load float , ptr %gep.b.1 , align 4
564
+ %gep.b.2 = getelementptr inbounds float , ptr %b , i32 2
565
+ %l.b.2 = load float , ptr %gep.b.2 , align 4
566
+
567
+ %mul.0 = fmul fast float %l.a.0 , %l.b.0
568
+ %mul.1 = fmul fast float %l.a.1 , %l.b.1
569
+ %mul.2 = fmul fast float %l.a.2 , %l.b.2
570
+
571
+ %add.0 = fadd fast float %mul.0 , %mul.1
572
+ %add.1 = fadd fast float %add.0 , %mul.2
573
+ ret float %add.1
574
+ }
575
+
576
+ define double @dot_product_fp64 (ptr %a , ptr %b ) {
577
+ ; CHECK-LABEL: @dot_product_fp64(
578
+ ; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
579
+ ; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds double, ptr [[A]], i32 2
580
+ ; CHECK-NEXT: [[L_A_2:%.*]] = load double, ptr [[GEP_A_2]], align 4
581
+ ; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
582
+ ; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds double, ptr [[B]], i32 2
583
+ ; CHECK-NEXT: [[L_B_2:%.*]] = load double, ptr [[GEP_B_2]], align 4
584
+ ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[GEP_A_0]], align 4
585
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[GEP_B_0]], align 4
586
+ ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP2]]
587
+ ; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast double [[L_A_2]], [[L_B_2]]
588
+ ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
589
+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
590
+ ; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast double [[TMP4]], [[TMP5]]
591
+ ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast double [[ADD_0]], [[MUL_2]]
592
+ ; CHECK-NEXT: ret double [[ADD_1]]
593
+ ;
594
+ %gep.a.0 = getelementptr inbounds double , ptr %a , i32 0
595
+ %l.a.0 = load double , ptr %gep.a.0 , align 4
596
+ %gep.a.1 = getelementptr inbounds double , ptr %a , i32 1
597
+ %l.a.1 = load double , ptr %gep.a.1 , align 4
598
+ %gep.a.2 = getelementptr inbounds double , ptr %a , i32 2
599
+ %l.a.2 = load double , ptr %gep.a.2 , align 4
600
+
601
+ %gep.b.0 = getelementptr inbounds double , ptr %b , i32 0
602
+ %l.b.0 = load double , ptr %gep.b.0 , align 4
603
+ %gep.b.1 = getelementptr inbounds double , ptr %b , i32 1
604
+ %l.b.1 = load double , ptr %gep.b.1 , align 4
605
+ %gep.b.2 = getelementptr inbounds double , ptr %b , i32 2
606
+ %l.b.2 = load double , ptr %gep.b.2 , align 4
607
+
608
+ %mul.0 = fmul fast double %l.a.0 , %l.b.0
609
+ %mul.1 = fmul fast double %l.a.1 , %l.b.1
610
+ %mul.2 = fmul fast double %l.a.2 , %l.b.2
611
+
612
+ %add.0 = fadd fast double %mul.0 , %mul.1
613
+ %add.1 = fadd fast double %add.0 , %mul.2
614
+ ret double %add.1
615
+ }
616
+
617
+
512
618
declare float @llvm.fmuladd.f32 (float , float , float )
513
619
514
620
declare double @llvm.fmuladd.f64 (double , double , double )
0 commit comments