Skip to content

Commit 641aaf7

Browse files
SLP cannot vectorize frem calls in AArch64.
It needs updated costs when there are available vector library functions given the VF and type.
1 parent cfcc272 commit 641aaf7

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -S -mtriple=aarch64 -vector-library=ArmPL -passes=slp-vectorizer | FileCheck %s
3+
4+
@a = common global ptr null, align 8
5+
6+
define void @frem_v2double() {
7+
; CHECK-LABEL: define void @frem_v2double() {
8+
; CHECK-NEXT: entry:
9+
; CHECK-NEXT: [[A0:%.*]] = load double, ptr @a, align 8
10+
; CHECK-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds (double, ptr @a, i64 1), align 8
11+
; CHECK-NEXT: [[B0:%.*]] = load double, ptr @a, align 8
12+
; CHECK-NEXT: [[B1:%.*]] = load double, ptr getelementptr inbounds (double, ptr @a, i64 1), align 8
13+
; CHECK-NEXT: [[R0:%.*]] = frem double [[A0]], [[B0]]
14+
; CHECK-NEXT: [[R1:%.*]] = frem double [[A1]], [[B1]]
15+
; CHECK-NEXT: store double [[R0]], ptr @a, align 8
16+
; CHECK-NEXT: store double [[R1]], ptr getelementptr inbounds (double, ptr @a, i64 1), align 8
17+
; CHECK-NEXT: ret void
18+
;
19+
entry:
20+
%a0 = load double, ptr getelementptr inbounds (double, ptr @a, i64 0), align 8
21+
%a1 = load double, ptr getelementptr inbounds (double, ptr @a, i64 1), align 8
22+
%b0 = load double, ptr getelementptr inbounds (double, ptr @a, i64 0), align 8
23+
%b1 = load double, ptr getelementptr inbounds (double, ptr @a, i64 1), align 8
24+
%r0 = frem double %a0, %b0
25+
%r1 = frem double %a1, %b1
26+
store double %r0, ptr getelementptr inbounds (double, ptr @a, i64 0), align 8
27+
store double %r1, ptr getelementptr inbounds (double, ptr @a, i64 1), align 8
28+
ret void
29+
}
30+
31+
define void @frem_v4float() {
32+
; CHECK-LABEL: define void @frem_v4float() {
33+
; CHECK-NEXT: entry:
34+
; CHECK-NEXT: [[A0:%.*]] = load float, ptr @a, align 8
35+
; CHECK-NEXT: [[A1:%.*]] = load float, ptr getelementptr inbounds (float, ptr @a, i64 1), align 8
36+
; CHECK-NEXT: [[A2:%.*]] = load float, ptr getelementptr inbounds (float, ptr @a, i64 2), align 8
37+
; CHECK-NEXT: [[A3:%.*]] = load float, ptr getelementptr inbounds (float, ptr @a, i64 3), align 8
38+
; CHECK-NEXT: [[B0:%.*]] = load float, ptr @a, align 8
39+
; CHECK-NEXT: [[B1:%.*]] = load float, ptr getelementptr inbounds (float, ptr @a, i64 1), align 8
40+
; CHECK-NEXT: [[B2:%.*]] = load float, ptr getelementptr inbounds (float, ptr @a, i64 2), align 8
41+
; CHECK-NEXT: [[B3:%.*]] = load float, ptr getelementptr inbounds (float, ptr @a, i64 3), align 8
42+
; CHECK-NEXT: [[R0:%.*]] = frem float [[A0]], [[B0]]
43+
; CHECK-NEXT: [[R1:%.*]] = frem float [[A1]], [[B1]]
44+
; CHECK-NEXT: [[R2:%.*]] = frem float [[A2]], [[B2]]
45+
; CHECK-NEXT: [[R3:%.*]] = frem float [[A3]], [[B3]]
46+
; CHECK-NEXT: store float [[R0]], ptr @a, align 8
47+
; CHECK-NEXT: store float [[R1]], ptr getelementptr inbounds (float, ptr @a, i64 1), align 8
48+
; CHECK-NEXT: store float [[R2]], ptr getelementptr inbounds (float, ptr @a, i64 2), align 8
49+
; CHECK-NEXT: store float [[R3]], ptr getelementptr inbounds (float, ptr @a, i64 3), align 8
50+
; CHECK-NEXT: ret void
51+
;
52+
entry:
53+
%a0 = load float, ptr getelementptr inbounds (float, ptr @a, i64 0), align 8
54+
%a1 = load float, ptr getelementptr inbounds (float, ptr @a, i64 1), align 8
55+
%a2 = load float, ptr getelementptr inbounds (float, ptr @a, i64 2), align 8
56+
%a3 = load float, ptr getelementptr inbounds (float, ptr @a, i64 3), align 8
57+
%b0 = load float, ptr getelementptr inbounds (float, ptr @a, i64 0), align 8
58+
%b1 = load float, ptr getelementptr inbounds (float, ptr @a, i64 1), align 8
59+
%b2 = load float, ptr getelementptr inbounds (float, ptr @a, i64 2), align 8
60+
%b3 = load float, ptr getelementptr inbounds (float, ptr @a, i64 3), align 8
61+
%r0 = frem float %a0, %b0
62+
%r1 = frem float %a1, %b1
63+
%r2 = frem float %a2, %b2
64+
%r3 = frem float %a3, %b3
65+
store float %r0, ptr getelementptr inbounds (float, ptr @a, i64 0), align 8
66+
store float %r1, ptr getelementptr inbounds (float, ptr @a, i64 1), align 8
67+
store float %r2, ptr getelementptr inbounds (float, ptr @a, i64 2), align 8
68+
store float %r3, ptr getelementptr inbounds (float, ptr @a, i64 3), align 8
69+
ret void
70+
}
71+

0 commit comments

Comments
 (0)