|
| 1 | +; RUN: %opt < %s %loadEnzyme -enzyme -enzyme-preopt=false -mem2reg -simplifycfg -dce -instcombine -S | FileCheck %s |
| 2 | + |
| 3 | +; Function Attrs: nounwind |
| 4 | +declare void @__enzyme_autodiff.f64(...) |
| 5 | + |
| 6 | +; Function Attrs: nounwind uwtable |
| 7 | +define dso_local void @memcpy_float(double* nocapture %dst, double* nocapture readonly %src, i64 %num) #0 { |
| 8 | +entry: |
| 9 | + %0 = bitcast double* %dst to i8* |
| 10 | + %1 = bitcast double* %src to i8* |
| 11 | + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 %num, i1 false) |
| 12 | + ret void |
| 13 | +} |
| 14 | + |
| 15 | +; Function Attrs: argmemonly nounwind |
| 16 | +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 |
| 17 | + |
| 18 | +; Function Attrs: nounwind uwtable |
| 19 | +define dso_local void @dmemcpy_float(double* %dst, double* %dstp1, double* %dstp2, double* %dstp3, double* %src, double* %srcp1, double* %dsrcp2, double* %dsrcp3, i64 %n) local_unnamed_addr #0 { |
| 20 | +entry: |
| 21 | + tail call void (...) @__enzyme_autodiff.f64(void (double*, double*, i64)* nonnull @memcpy_float, metadata !"enzyme_width", i64 3, double* %dst, double* %dstp1, double* %dstp2, double* %dstp3, double* %src, double* %srcp1, double* %dsrcp2, double* %dsrcp3, i64 %n) #3 |
| 22 | + ret void |
| 23 | +} |
| 24 | + |
| 25 | +attributes #0 = { nounwind uwtable } |
| 26 | +attributes #1 = { argmemonly nounwind } |
| 27 | +attributes #2 = { noinline nounwind uwtable } |
| 28 | + |
| 29 | + |
| 30 | +; CHECK: define internal void @diffe3memcpy_float(double* nocapture %dst, [3 x double*] %"dst'", double* nocapture readonly %src, [3 x double*] %"src'", i64 %num) |
| 31 | +; CHECK-NEXT: entry: |
| 32 | +; CHECK-NEXT: %0 = extractvalue [3 x double*] %"dst'", 0 |
| 33 | +; CHECK-NEXT: %1 = extractvalue [3 x double*] %"dst'", 1 |
| 34 | +; CHECK-NEXT: %2 = extractvalue [3 x double*] %"dst'", 2 |
| 35 | +; CHECK-NEXT: %3 = bitcast double* %dst to i8* |
| 36 | +; CHECK-NEXT: %4 = extractvalue [3 x double*] %"src'", 0 |
| 37 | +; CHECK-NEXT: %5 = extractvalue [3 x double*] %"src'", 1 |
| 38 | +; CHECK-NEXT: %6 = extractvalue [3 x double*] %"src'", 2 |
| 39 | +; CHECK-NEXT: %7 = bitcast double* %src to i8* |
| 40 | +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %3, i8* align 1 %7, i64 %num, i1 false) |
| 41 | +; CHECK-NEXT: %8 = lshr i64 %num, 3 |
| 42 | +; CHECK-NEXT: %9 = {{(icmp eq i64 %8, 0|icmp ult i64 %num, 8)}} |
| 43 | +; CHECK-NEXT: br i1 %9, label %__enzyme_memcpyadd_doubleda1sa1.exit, label %for.body.i |
| 44 | + |
| 45 | +; CHECK: for.body.i: ; preds = %for.body.i, %entry |
| 46 | +; CHECK-NEXT: %idx.i = phi i64 [ 0, %entry ], [ %idx.next.i, %for.body.i ] |
| 47 | +; CHECK-NEXT: %dst.i.i = getelementptr inbounds double, double* %0, i64 %idx.i |
| 48 | +; CHECK-NEXT: %dst.i.l.i = load double, double* %dst.i.i, align 1 |
| 49 | +; CHECK-NEXT: store double 0.000000e+00, double* %dst.i.i, align 1 |
| 50 | +; CHECK-NEXT: %src.i.i = getelementptr inbounds double, double* %4, i64 %idx.i |
| 51 | +; CHECK-NEXT: %src.i.l.i = load double, double* %src.i.i, align 1 |
| 52 | +; CHECK-NEXT: %10 = fadd fast double %src.i.l.i, %dst.i.l.i |
| 53 | +; CHECK-NEXT: store double %10, double* %src.i.i, align 1 |
| 54 | +; CHECK-NEXT: %idx.next.i = add nuw i64 %idx.i, 1 |
| 55 | +; CHECK-NEXT: %11 = icmp eq i64 %8, %idx.next.i |
| 56 | +; CHECK-NEXT: br i1 %11, label %__enzyme_memcpyadd_doubleda1sa1.exit, label %for.body.i |
| 57 | + |
| 58 | +; CHECK: __enzyme_memcpyadd_doubleda1sa1.exit: ; preds = %entry, %for.body.i |
| 59 | +; CHECK-NEXT: %12 = lshr i64 %num, 3 |
| 60 | +; CHECK-NEXT: %13 = {{(icmp eq i64 %12, 0|icmp ult i64 %num, 8)}} |
| 61 | +; CHECK-NEXT: br i1 %13, label %__enzyme_memcpyadd_doubleda1sa1.exit13, label %for.body.i12 |
| 62 | + |
| 63 | +; CHECK: or.body.i12: ; preds = %for.body.i12, %__enzyme_memcpyadd_doubleda1sa1.exit |
| 64 | +; CHECK-NEXT: %idx.i6 = phi i64 [ 0, %__enzyme_memcpyadd_doubleda1sa1.exit ], [ %idx.next.i11, %for.body.i12 ] |
| 65 | +; CHECK-NEXT: %dst.i.i7 = getelementptr inbounds double, double* %1, i64 %idx.i6 |
| 66 | +; CHECK-NEXT: %dst.i.l.i8 = load double, double* %dst.i.i7, align 1 |
| 67 | +; CHECK-NEXT: store double 0.000000e+00, double* %dst.i.i7, align 1 |
| 68 | +; CHECK-NEXT: %src.i.i9 = getelementptr inbounds double, double* %5, i64 %idx.i6 |
| 69 | +; CHECK-NEXT: %src.i.l.i10 = load double, double* %src.i.i9, align 1 |
| 70 | +; CHECK-NEXT: %14 = fadd fast double %src.i.l.i10, %dst.i.l.i8 |
| 71 | +; CHECK-NEXT: store double %14, double* %src.i.i9, align 1 |
| 72 | +; CHECK-NEXT: %idx.next.i11 = add nuw i64 %idx.i6, 1 |
| 73 | +; CHECK-NEXT: %15 = icmp eq i64 %12, %idx.next.i11 |
| 74 | +; CHECK-NEXT: br i1 %15, label %__enzyme_memcpyadd_doubleda1sa1.exit13, label %for.body.i12 |
| 75 | + |
| 76 | +; CHECK: __enzyme_memcpyadd_doubleda1sa1.exit13: ; preds = %__enzyme_memcpyadd_doubleda1sa1.exit, %for.body.i12 |
| 77 | +; CHECK-NEXT: %16 = lshr i64 %num, 3 |
| 78 | +; CHECK-NEXT: %17 = {{(icmp eq i64 %16, 0|icmp ult i64 %num, 8)}} |
| 79 | +; CHECK-NEXT: br i1 %17, label %__enzyme_memcpyadd_doubleda1sa1.exit21, label %for.body.i20 |
| 80 | + |
| 81 | +; CHECK: for.body.i20: ; preds = %for.body.i20, %__enzyme_memcpyadd_doubleda1sa1.exit13 |
| 82 | +; CHECK-NEXT: %idx.i14 = phi i64 [ 0, %__enzyme_memcpyadd_doubleda1sa1.exit13 ], [ %idx.next.i19, %for.body.i20 ] |
| 83 | +; CHECK-NEXT: %dst.i.i15 = getelementptr inbounds double, double* %2, i64 %idx.i14 |
| 84 | +; CHECK-NEXT: %dst.i.l.i16 = load double, double* %dst.i.i15, align 1 |
| 85 | +; CHECK-NEXT: store double 0.000000e+00, double* %dst.i.i15, align 1 |
| 86 | +; CHECK-NEXT: %src.i.i17 = getelementptr inbounds double, double* %6, i64 %idx.i14 |
| 87 | +; CHECK-NEXT: %src.i.l.i18 = load double, double* %src.i.i17, align 1 |
| 88 | +; CHECK-NEXT: %18 = fadd fast double %src.i.l.i18, %dst.i.l.i16 |
| 89 | +; CHECK-NEXT: store double %18, double* %src.i.i17, align 1 |
| 90 | +; CHECK-NEXT: %idx.next.i19 = add nuw i64 %idx.i14, 1 |
| 91 | +; CHECK-NEXT: %19 = icmp eq i64 %16, %idx.next.i19 |
| 92 | +; CHECK-NEXT: br i1 %19, label %__enzyme_memcpyadd_doubleda1sa1.exit21, label %for.body.i20 |
| 93 | + |
| 94 | +; CHECK: __enzyme_memcpyadd_doubleda1sa1.exit21: ; preds = %__enzyme_memcpyadd_doubleda1sa1.exit13, %for.body.i20 |
| 95 | +; CHECK-NEXT: ret void |
| 96 | +; CHECK-NEXT: } |
0 commit comments