Skip to content

Commit c990136

Browse files
authored
Add additional reverse mode vector tests (rust-lang#640)
1 parent 4460d33 commit c990136

File tree

3 files changed

+196
-0
lines changed

3 files changed

+196
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
; RUN: %opt < %s %loadEnzyme -enzyme -enzyme-preopt=false -mem2reg -simplifycfg -dce -instcombine -S | FileCheck %s
2+
3+
; Function Attrs: nounwind
4+
declare void @__enzyme_autodiff.f64(...)
5+
6+
; Function Attrs: nounwind uwtable
7+
define dso_local void @memcpy_float(double* nocapture %dst, double* nocapture readonly %src, i64 %num) #0 {
8+
entry:
9+
%0 = bitcast double* %dst to i8*
10+
%1 = bitcast double* %src to i8*
11+
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 %num, i1 false)
12+
ret void
13+
}
14+
15+
; Function Attrs: argmemonly nounwind
16+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
17+
18+
; Function Attrs: nounwind uwtable
19+
define dso_local void @dmemcpy_float(double* %dst, double* %dstp1, double* %dstp2, double* %dstp3, double* %src, double* %srcp1, double* %dsrcp2, double* %dsrcp3, i64 %n) local_unnamed_addr #0 {
20+
entry:
21+
tail call void (...) @__enzyme_autodiff.f64(void (double*, double*, i64)* nonnull @memcpy_float, metadata !"enzyme_width", i64 3, double* %dst, double* %dstp1, double* %dstp2, double* %dstp3, double* %src, double* %srcp1, double* %dsrcp2, double* %dsrcp3, i64 %n) #3
22+
ret void
23+
}
24+
25+
attributes #0 = { nounwind uwtable }
26+
attributes #1 = { argmemonly nounwind }
27+
attributes #2 = { noinline nounwind uwtable }
28+
29+
30+
; CHECK: define internal void @diffe3memcpy_float(double* nocapture %dst, [3 x double*] %"dst'", double* nocapture readonly %src, [3 x double*] %"src'", i64 %num)
31+
; CHECK-NEXT: entry:
32+
; CHECK-NEXT: %0 = extractvalue [3 x double*] %"dst'", 0
33+
; CHECK-NEXT: %1 = extractvalue [3 x double*] %"dst'", 1
34+
; CHECK-NEXT: %2 = extractvalue [3 x double*] %"dst'", 2
35+
; CHECK-NEXT: %3 = bitcast double* %dst to i8*
36+
; CHECK-NEXT: %4 = extractvalue [3 x double*] %"src'", 0
37+
; CHECK-NEXT: %5 = extractvalue [3 x double*] %"src'", 1
38+
; CHECK-NEXT: %6 = extractvalue [3 x double*] %"src'", 2
39+
; CHECK-NEXT: %7 = bitcast double* %src to i8*
40+
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %3, i8* align 1 %7, i64 %num, i1 false)
41+
; CHECK-NEXT: %8 = lshr i64 %num, 3
42+
; CHECK-NEXT: %9 = {{(icmp eq i64 %8, 0|icmp ult i64 %num, 8)}}
43+
; CHECK-NEXT: br i1 %9, label %__enzyme_memcpyadd_doubleda1sa1.exit, label %for.body.i
44+
45+
; CHECK: for.body.i: ; preds = %for.body.i, %entry
46+
; CHECK-NEXT: %idx.i = phi i64 [ 0, %entry ], [ %idx.next.i, %for.body.i ]
47+
; CHECK-NEXT: %dst.i.i = getelementptr inbounds double, double* %0, i64 %idx.i
48+
; CHECK-NEXT: %dst.i.l.i = load double, double* %dst.i.i, align 1
49+
; CHECK-NEXT: store double 0.000000e+00, double* %dst.i.i, align 1
50+
; CHECK-NEXT: %src.i.i = getelementptr inbounds double, double* %4, i64 %idx.i
51+
; CHECK-NEXT: %src.i.l.i = load double, double* %src.i.i, align 1
52+
; CHECK-NEXT: %10 = fadd fast double %src.i.l.i, %dst.i.l.i
53+
; CHECK-NEXT: store double %10, double* %src.i.i, align 1
54+
; CHECK-NEXT: %idx.next.i = add nuw i64 %idx.i, 1
55+
; CHECK-NEXT: %11 = icmp eq i64 %8, %idx.next.i
56+
; CHECK-NEXT: br i1 %11, label %__enzyme_memcpyadd_doubleda1sa1.exit, label %for.body.i
57+
58+
; CHECK: __enzyme_memcpyadd_doubleda1sa1.exit: ; preds = %entry, %for.body.i
59+
; CHECK-NEXT: %12 = lshr i64 %num, 3
60+
; CHECK-NEXT: %13 = {{(icmp eq i64 %12, 0|icmp ult i64 %num, 8)}}
61+
; CHECK-NEXT: br i1 %13, label %__enzyme_memcpyadd_doubleda1sa1.exit13, label %for.body.i12
62+
63+
; CHECK: or.body.i12: ; preds = %for.body.i12, %__enzyme_memcpyadd_doubleda1sa1.exit
64+
; CHECK-NEXT: %idx.i6 = phi i64 [ 0, %__enzyme_memcpyadd_doubleda1sa1.exit ], [ %idx.next.i11, %for.body.i12 ]
65+
; CHECK-NEXT: %dst.i.i7 = getelementptr inbounds double, double* %1, i64 %idx.i6
66+
; CHECK-NEXT: %dst.i.l.i8 = load double, double* %dst.i.i7, align 1
67+
; CHECK-NEXT: store double 0.000000e+00, double* %dst.i.i7, align 1
68+
; CHECK-NEXT: %src.i.i9 = getelementptr inbounds double, double* %5, i64 %idx.i6
69+
; CHECK-NEXT: %src.i.l.i10 = load double, double* %src.i.i9, align 1
70+
; CHECK-NEXT: %14 = fadd fast double %src.i.l.i10, %dst.i.l.i8
71+
; CHECK-NEXT: store double %14, double* %src.i.i9, align 1
72+
; CHECK-NEXT: %idx.next.i11 = add nuw i64 %idx.i6, 1
73+
; CHECK-NEXT: %15 = icmp eq i64 %12, %idx.next.i11
74+
; CHECK-NEXT: br i1 %15, label %__enzyme_memcpyadd_doubleda1sa1.exit13, label %for.body.i12
75+
76+
; CHECK: __enzyme_memcpyadd_doubleda1sa1.exit13: ; preds = %__enzyme_memcpyadd_doubleda1sa1.exit, %for.body.i12
77+
; CHECK-NEXT: %16 = lshr i64 %num, 3
78+
; CHECK-NEXT: %17 = {{(icmp eq i64 %16, 0|icmp ult i64 %num, 8)}}
79+
; CHECK-NEXT: br i1 %17, label %__enzyme_memcpyadd_doubleda1sa1.exit21, label %for.body.i20
80+
81+
; CHECK: for.body.i20: ; preds = %for.body.i20, %__enzyme_memcpyadd_doubleda1sa1.exit13
82+
; CHECK-NEXT: %idx.i14 = phi i64 [ 0, %__enzyme_memcpyadd_doubleda1sa1.exit13 ], [ %idx.next.i19, %for.body.i20 ]
83+
; CHECK-NEXT: %dst.i.i15 = getelementptr inbounds double, double* %2, i64 %idx.i14
84+
; CHECK-NEXT: %dst.i.l.i16 = load double, double* %dst.i.i15, align 1
85+
; CHECK-NEXT: store double 0.000000e+00, double* %dst.i.i15, align 1
86+
; CHECK-NEXT: %src.i.i17 = getelementptr inbounds double, double* %6, i64 %idx.i14
87+
; CHECK-NEXT: %src.i.l.i18 = load double, double* %src.i.i17, align 1
88+
; CHECK-NEXT: %18 = fadd fast double %src.i.l.i18, %dst.i.l.i16
89+
; CHECK-NEXT: store double %18, double* %src.i.i17, align 1
90+
; CHECK-NEXT: %idx.next.i19 = add nuw i64 %idx.i14, 1
91+
; CHECK-NEXT: %19 = icmp eq i64 %16, %idx.next.i19
92+
; CHECK-NEXT: br i1 %19, label %__enzyme_memcpyadd_doubleda1sa1.exit21, label %for.body.i20
93+
94+
; CHECK: __enzyme_memcpyadd_doubleda1sa1.exit21: ; preds = %__enzyme_memcpyadd_doubleda1sa1.exit13, %for.body.i20
95+
; CHECK-NEXT: ret void
96+
; CHECK-NEXT: }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; RUN: %opt < %s %loadEnzyme -enzyme -enzyme-preopt=false -mem2reg -simplifycfg -dce -S | FileCheck %s
2+
; Function Attrs: nounwind
3+
declare void @__enzyme_autodiff.f64(...)
4+
5+
; Function Attrs: nounwind uwtable
6+
define dso_local void @memcpy_ptr(i8* nocapture %dst, i8* nocapture readonly %src, i64 %num) {
7+
entry:
8+
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 %num, i1 false), !tbaa !17, !tbaa.struct !19
9+
ret void
10+
}
11+
12+
; Function Attrs: argmemonly nounwind
13+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #0
14+
15+
; Function Attrs: nounwind uwtable
16+
define dso_local void @dmemcpy_ptr(i8* %dst, i8* %dstp1, i8* %dstp2, i8* %dstp3, i8* %src, i8* %dsrcp1, i8* %dsrcp2, i8* %dsrcp3, i64 %n) {
17+
entry:
18+
tail call void (...) @__enzyme_autodiff.f64(void (i8*, i8*, i64)* nonnull @memcpy_ptr, metadata !"enzyme_width", i64 3, metadata !"enzyme_dup", i8* %dst, i8* %dstp1, i8* %dstp2, i8* %dstp3, metadata !"enzyme_dup", i8* %src, i8* %dsrcp1, i8* %dsrcp2, i8* %dsrcp3, i64 %n)
19+
ret void
20+
}
21+
22+
attributes #0 = { argmemonly nounwind }
23+
24+
!17 = !{!18, !18, i64 0, i64 32}
25+
!18 = !{!4, i64 32, !"_ZTSSt5arrayIlLm4EE", !9, i64 0, i64 32}
26+
27+
!19 = !{i64 0, i64 32, !20}
28+
!20 = !{!9, !9, i64 0, i64 32}
29+
!9 = !{!4, i64 8, !"long"}
30+
!4 = !{!5, i64 1, !"omnipotent char"}
31+
!5 = !{!"Simple C++ TBAA"}
32+
33+
34+
; CHECK: define internal void @diffe3memcpy_ptr(i8* nocapture %dst, [3 x i8*] %"dst'", i8* nocapture readonly %src, [3 x i8*] %"src'", i64 %num)
35+
; CHECK-NEXT: entry:
36+
; CHECK-NEXT: %0 = extractvalue [3 x i8*] %"dst'", 0
37+
; CHECK-NEXT: %1 = extractvalue [3 x i8*] %"src'", 0
38+
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 %num, i1 false)
39+
; CHECK-NEXT: %2 = extractvalue [3 x i8*] %"dst'", 1
40+
; CHECK-NEXT: %3 = extractvalue [3 x i8*] %"src'", 1
41+
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %2, i8* align 8 %3, i64 %num, i1 false)
42+
; CHECK-NEXT: %4 = extractvalue [3 x i8*] %"dst'", 2
43+
; CHECK-NEXT: %5 = extractvalue [3 x i8*] %"src'", 2
44+
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %4, i8* align 8 %5, i64 %num, i1 false)
45+
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 %num, i1 false)
46+
; CHECK-NEXT: ret void
47+
; CHECK-NEXT: }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
2+
; RUN: %opt < %s %loadEnzyme -enzyme -enzyme-preopt=false -mem2reg -simplifycfg -dce -S | FileCheck %s
3+
4+
; Function Attrs: nounwind
5+
declare void @__enzyme_autodiff.f64(...)
6+
7+
; Function Attrs: nounwind uwtable
8+
define dso_local void @memcpy_ptr(double** nocapture %dst, double** nocapture readonly %src, i64 %num) #0 {
9+
entry:
10+
%0 = bitcast double** %dst to i8*
11+
%1 = bitcast double** %src to i8*
12+
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 %num, i1 false)
13+
ret void
14+
}
15+
16+
; Function Attrs: argmemonly nounwind
17+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
18+
19+
; Function Attrs: nounwind uwtable
20+
define dso_local void @dmemcpy_ptr(double** %dst, double** %dstp1, double** %dstp2, double** %dstp3, double** %src, double** %srcp1, double** %srcp2, double** %srcp3, i64 %n) local_unnamed_addr #0 {
21+
entry:
22+
tail call void (...) @__enzyme_autodiff.f64(void (double**, double**, i64)* nonnull @memcpy_ptr, metadata !"enzyme_width", i64 3, double** %dst, double** %dstp1, double** %dstp2, double** %dstp3, double** %src, double** %srcp1, double** %srcp2, double** %srcp3, i64 %n) #3
23+
ret void
24+
}
25+
26+
attributes #0 = { nounwind uwtable }
27+
attributes #1 = { argmemonly nounwind }
28+
attributes #2 = { noinline nounwind uwtable }
29+
attributes #3 = { nounwind }
30+
31+
32+
; CHECK: define internal void @diffe3memcpy_ptr(double** nocapture %dst, [3 x double**] %"dst'", double** nocapture readonly %src, [3 x double**] %"src'", i64 %num)
33+
; CHECK-NEXT: entry:
34+
; CHECK-NEXT: %0 = extractvalue [3 x double**] %"dst'", 0
35+
; CHECK-NEXT: %"'ipc" = bitcast double** %0 to i8*
36+
; CHECK-NEXT: %1 = extractvalue [3 x double**] %"dst'", 1
37+
; CHECK-NEXT: %"'ipc1" = bitcast double** %1 to i8*
38+
; CHECK-NEXT: %2 = extractvalue [3 x double**] %"dst'", 2
39+
; CHECK-NEXT: %"'ipc2" = bitcast double** %2 to i8*
40+
; CHECK-NEXT: %3 = bitcast double** %dst to i8*
41+
; CHECK-NEXT: %4 = extractvalue [3 x double**] %"src'", 0
42+
; CHECK-NEXT: %"'ipc3" = bitcast double** %4 to i8*
43+
; CHECK-NEXT: %5 = extractvalue [3 x double**] %"src'", 1
44+
; CHECK-NEXT: %"'ipc4" = bitcast double** %5 to i8*
45+
; CHECK-NEXT: %6 = extractvalue [3 x double**] %"src'", 2
46+
; CHECK-NEXT: %"'ipc5" = bitcast double** %6 to i8*
47+
; CHECK-NEXT: %7 = bitcast double** %src to i8*
48+
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %"'ipc", i8* align 1 %"'ipc3", i64 %num, i1 false)
49+
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %"'ipc1", i8* align 1 %"'ipc4", i64 %num, i1 false)
50+
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %"'ipc2", i8* align 1 %"'ipc5", i64 %num, i1 false)
51+
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %3, i8* align 1 %7, i64 %num, i1 false)
52+
; CHECK-NEXT: ret void
53+
; CHECK-NEXT: }

0 commit comments

Comments
 (0)