Skip to content

Commit acb33a0

Browse files
committed
[RISCV][SLP] Add test coverage for 2^N-1 vector sizes w/FP types
Our cost modeling for FP and integer differs in enough cases that having both is useful for exercising different logic in SLP.
1 parent fa4fbae commit acb33a0

File tree

1 file changed

+108
-2
lines changed

1 file changed

+108
-2
lines changed

llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll

Lines changed: 108 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,29 @@ define i32 @reduce_add(ptr %src) {
433433
ret i32 %add.1
434434
}
435435

436+
define float @reduce_fadd(ptr %src) {
437+
; CHECK-LABEL: @reduce_fadd(
438+
; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
439+
; CHECK-NEXT: [[L_SRC_0:%.*]] = load float, ptr [[GEP_SRC_0]], align 4
440+
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 1
441+
; CHECK-NEXT: [[L_SRC_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
442+
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2
443+
; CHECK-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
444+
; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast float [[L_SRC_0]], [[L_SRC_1]]
445+
; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[L_SRC_2]]
446+
; CHECK-NEXT: ret float [[ADD_1]]
447+
;
448+
%gep.src.0 = getelementptr inbounds float, ptr %src, i32 0
449+
%l.src.0 = load float, ptr %gep.src.0, align 4
450+
%gep.src.1 = getelementptr inbounds float, ptr %src, i32 1
451+
%l.src.1 = load float, ptr %gep.src.1, align 4
452+
%gep.src.2 = getelementptr inbounds float, ptr %src, i32 2
453+
%l.src.2 = load float, ptr %gep.src.2, align 4
454+
455+
%add.0 = fadd fast float %l.src.0, %l.src.1
456+
%add.1 = fadd fast float %add.0, %l.src.2
457+
ret float %add.1
458+
}
436459

437460
define i32 @reduce_add_after_mul(ptr %src) {
438461
; CHECK-LABEL: @reduce_add_after_mul(
@@ -465,8 +488,8 @@ define i32 @reduce_add_after_mul(ptr %src) {
465488
ret i32 %add.1
466489
}
467490

468-
define i32 @dot_product(ptr %a, ptr %b) {
469-
; CHECK-LABEL: @dot_product(
491+
define i32 @dot_product_i32(ptr %a, ptr %b) {
492+
; CHECK-LABEL: @dot_product_i32(
470493
; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
471494
; CHECK-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4
472495
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
@@ -509,6 +532,89 @@ define i32 @dot_product(ptr %a, ptr %b) {
509532
ret i32 %add.1
510533
}
511534

535+
define float @dot_product_fp32(ptr %a, ptr %b) {
536+
; CHECK-LABEL: @dot_product_fp32(
537+
; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
538+
; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
539+
; CHECK-NEXT: [[L_A_2:%.*]] = load float, ptr [[GEP_A_2]], align 4
540+
; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
541+
; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
542+
; CHECK-NEXT: [[L_B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
543+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_A_0]], align 4
544+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4
545+
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]]
546+
; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]]
547+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
548+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
549+
; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
550+
; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
551+
; CHECK-NEXT: ret float [[ADD_1]]
552+
;
553+
%gep.a.0 = getelementptr inbounds float, ptr %a, i32 0
554+
%l.a.0 = load float, ptr %gep.a.0, align 4
555+
%gep.a.1 = getelementptr inbounds float, ptr %a, i32 1
556+
%l.a.1 = load float, ptr %gep.a.1, align 4
557+
%gep.a.2 = getelementptr inbounds float, ptr %a, i32 2
558+
%l.a.2 = load float, ptr %gep.a.2, align 4
559+
560+
%gep.b.0 = getelementptr inbounds float, ptr %b, i32 0
561+
%l.b.0 = load float, ptr %gep.b.0, align 4
562+
%gep.b.1 = getelementptr inbounds float, ptr %b, i32 1
563+
%l.b.1 = load float, ptr %gep.b.1, align 4
564+
%gep.b.2 = getelementptr inbounds float, ptr %b, i32 2
565+
%l.b.2 = load float, ptr %gep.b.2, align 4
566+
567+
%mul.0 = fmul fast float %l.a.0, %l.b.0
568+
%mul.1 = fmul fast float %l.a.1, %l.b.1
569+
%mul.2 = fmul fast float %l.a.2, %l.b.2
570+
571+
%add.0 = fadd fast float %mul.0, %mul.1
572+
%add.1 = fadd fast float %add.0, %mul.2
573+
ret float %add.1
574+
}
575+
576+
define double @dot_product_fp64(ptr %a, ptr %b) {
577+
; CHECK-LABEL: @dot_product_fp64(
578+
; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
579+
; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds double, ptr [[A]], i32 2
580+
; CHECK-NEXT: [[L_A_2:%.*]] = load double, ptr [[GEP_A_2]], align 4
581+
; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
582+
; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds double, ptr [[B]], i32 2
583+
; CHECK-NEXT: [[L_B_2:%.*]] = load double, ptr [[GEP_B_2]], align 4
584+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[GEP_A_0]], align 4
585+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[GEP_B_0]], align 4
586+
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP2]]
587+
; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast double [[L_A_2]], [[L_B_2]]
588+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
589+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
590+
; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast double [[TMP4]], [[TMP5]]
591+
; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast double [[ADD_0]], [[MUL_2]]
592+
; CHECK-NEXT: ret double [[ADD_1]]
593+
;
594+
%gep.a.0 = getelementptr inbounds double, ptr %a, i32 0
595+
%l.a.0 = load double, ptr %gep.a.0, align 4
596+
%gep.a.1 = getelementptr inbounds double, ptr %a, i32 1
597+
%l.a.1 = load double, ptr %gep.a.1, align 4
598+
%gep.a.2 = getelementptr inbounds double, ptr %a, i32 2
599+
%l.a.2 = load double, ptr %gep.a.2, align 4
600+
601+
%gep.b.0 = getelementptr inbounds double, ptr %b, i32 0
602+
%l.b.0 = load double, ptr %gep.b.0, align 4
603+
%gep.b.1 = getelementptr inbounds double, ptr %b, i32 1
604+
%l.b.1 = load double, ptr %gep.b.1, align 4
605+
%gep.b.2 = getelementptr inbounds double, ptr %b, i32 2
606+
%l.b.2 = load double, ptr %gep.b.2, align 4
607+
608+
%mul.0 = fmul fast double %l.a.0, %l.b.0
609+
%mul.1 = fmul fast double %l.a.1, %l.b.1
610+
%mul.2 = fmul fast double %l.a.2, %l.b.2
611+
612+
%add.0 = fadd fast double %mul.0, %mul.1
613+
%add.1 = fadd fast double %add.0, %mul.2
614+
ret double %add.1
615+
}
616+
617+
512618
declare float @llvm.fmuladd.f32(float, float, float)
513619

514620
declare double @llvm.fmuladd.f64(double, double, double)

0 commit comments

Comments
 (0)