Implement vector mode: aggregates and vectors (rust-lang#453)

tgymnich · web-flow · commit 54459b98208a · 2022-01-29T11:33:43.000+01:00
diff --git a/enzyme/Enzyme/AdjointGenerator.h b/enzyme/Enzyme/AdjointGenerator.h
@@ -1205,13 +1205,18 @@ class AdjointGenerator
       getForwardBuilder(Builder2);
 
       Value *orig_vec = EEI.getVectorOperand();
+      Type *vecTy = gutils->getShadowType(orig_vec->getType());
 
       auto vec_diffe = gutils->isConstantValue(orig_vec)
-                           ? ConstantVector::getNullValue(orig_vec->getType())
+                           ? Constant::getNullValue(vecTy)
                            : diffe(orig_vec, Builder2);
-      auto diffe =
-          Builder2.CreateExtractElement(vec_diffe, EEI.getIndexOperand());
 
+      auto rule = [&](Value *vec_diffe) {
+        return Builder2.CreateExtractElement(
+            vec_diffe, gutils->getNewFromOriginal(EEI.getIndexOperand()));
+      };
+
+      auto diffe = applyChainRule(EEI.getType(), Builder2, rule, vec_diffe);
       setDiffe(&EEI, diffe, Builder2);
       return;
     }
@@ -1260,19 +1265,25 @@ class AdjointGenerator
       Value *orig_inserted = IEI.getOperand(1);
       Value *orig_index = IEI.getOperand(2);
 
+      Type *insertedTy = gutils->getShadowType(orig_inserted->getType());
+      Type *vectorTy = gutils->getShadowType(orig_vector->getType());
+
       Value *diff_inserted = gutils->isConstantValue(orig_inserted)
-                                 ? ConstantFP::get(orig_inserted->getType(), 0)
+                                 ? Constant::getNullValue(insertedTy)
                                  : diffe(orig_inserted, Builder2);
 
-      Value *prediff =
-          gutils->isConstantValue(orig_vector)
-              ? ConstantVector::getNullValue(orig_vector->getType())
-              : diffe(orig_vector, Builder2);
+      Value *prediff = gutils->isConstantValue(orig_vector)
+                           ? Constant::getNullValue(vectorTy)
+                           : diffe(orig_vector, Builder2);
 
-      auto dindex = Builder2.CreateInsertElement(
-          prediff, diff_inserted, gutils->getNewFromOriginal(orig_index));
-      setDiffe(&IEI, dindex, Builder2);
+      auto rule = [&](Value *diff_inserted, Value *prediff) {
+        return Builder2.CreateInsertElement(
+            prediff, diff_inserted, gutils->getNewFromOriginal(orig_index));
+      };
 
+      Value *dindex =
+          applyChainRule(IEI.getType(), Builder2, rule, diff_inserted, prediff);
+      setDiffe(&IEI, dindex, Builder2);
       return;
     }
     case DerivativeMode::ReverseModeGradient:
@@ -1345,14 +1356,19 @@ class AdjointGenerator
               ? ConstantVector::getNullValue(orig_vector2->getType())
               : diffe(orig_vector2, Builder2);
 
+      auto rule = [&](Value *diffe_vector1, Value *diffe_vector2) {
 #if LLVM_VERSION_MAJOR >= 11
-      auto diffe = Builder2.CreateShuffleVector(diffe_vector1, diffe_vector2,
-                                                SVI.getShuffleMaskForBitcode());
+        auto diffe = Builder2.CreateShuffleVector(
+            diffe_vector1, diffe_vector2, SVI.getShuffleMaskForBitcode());
 #else
-      auto diffe = Builder2.CreateShuffleVector(diffe_vector1, diffe_vector2,
-                                                SVI.getOperand(2));
+        auto diffe = Builder2.CreateShuffleVector(diffe_vector1, diffe_vector2,
+                                                  SVI.getOperand(2));
 #endif
+        return diffe;
+      };
 
+      auto diffe = applyChainRule(SVI.getType(), Builder2, rule, diffe_vector1,
+                                  diffe_vector2);
       setDiffe(&SVI, diffe, Builder2);
       return;
     }
@@ -1417,15 +1433,19 @@ class AdjointGenerator
       getForwardBuilder(Builder2);
 
       Value *orig_aggregate = EVI.getAggregateOperand();
+      Type *agg_type = gutils->getShadowType(orig_aggregate->getType());
+
+      Value *diffe_aggregate = gutils->isConstantValue(orig_aggregate)
+                                   ? Constant::getNullValue(agg_type)
+                                   : diffe(orig_aggregate, Builder2);
 
-      Value *diffe_aggregate =
-          gutils->isConstantValue(orig_aggregate)
-              ? ConstantAggregate::getNullValue(orig_aggregate->getType())
-              : diffe(orig_aggregate, Builder2);
-      Value *diffe =
-          Builder2.CreateExtractValue(diffe_aggregate, EVI.getIndices());
+      auto rule = [&](Value *diffe_aggregate) {
+        return Builder2.CreateExtractValue(diffe_aggregate, EVI.getIndices());
+      };
 
-      setDiffe(&EVI, diffe, Builder2);
+      Value *diff =
+          applyChainRule(EVI.getType(), Builder2, rule, diffe_aggregate);
+      setDiffe(&EVI, diff, Builder2);
       return;
     }
     case DerivativeMode::ReverseModeGradient:
@@ -1526,20 +1546,27 @@ class AdjointGenerator
       IRBuilder<> Builder2(&IVI);
       getForwardBuilder(Builder2);
 
-      Value *orig_inserted = IVI.getInsertedValueOperand();
+      Value *orig_val = IVI.getInsertedValueOperand();
       Value *orig_agg = IVI.getAggregateOperand();
 
-      Value *diff_inserted = gutils->isConstantValue(orig_inserted)
-                                 ? ConstantFP::get(orig_inserted->getType(), 0)
-                                 : diffe(orig_inserted, Builder2);
+      Type *val_type = gutils->getShadowType(orig_val->getType());
+      Type *agg_type = gutils->getShadowType(orig_agg->getType());
+
+      Value *diff_val = gutils->isConstantValue(orig_val)
+                            ? Constant::getNullValue(val_type)
+                            : diffe(orig_val, Builder2);
+
+      Value *diff_agg = gutils->isConstantValue(orig_agg)
+                            ? Constant::getNullValue(agg_type)
+                            : diffe(orig_agg, Builder2);
+
+      auto rule = [&](Value *diff_agg, Value *diff_val) {
+        return Builder2.CreateInsertValue(diff_agg, diff_val, IVI.getIndices());
+      };
 
-      Value *prediff =
-          gutils->isConstantValue(orig_agg)
-              ? ConstantAggregate::getNullValue(orig_agg->getType())
-              : diffe(orig_agg, Builder2);
-      auto dindex =
-          Builder2.CreateInsertValue(prediff, diff_inserted, IVI.getIndices());
-      setDiffe(&IVI, dindex, Builder2);
+      Value *diff = applyChainRule(orig_agg->getType(), Builder2, rule,
+                                   diff_agg, diff_val);
+      setDiffe(&IVI, diff, Builder2);
 
       return;
     }
diff --git a/enzyme/test/Enzyme/ForwardModeVector/insertvalue.ll b/enzyme/test/Enzyme/ForwardModeVector/insertvalue.ll
@@ -0,0 +1,48 @@
+; RUN: %opt < %s %loadEnzyme -enzyme -enzyme-preopt=false -mem2reg -instsimplify -simplifycfg -S | FileCheck %s
+
+%struct.Gradients = type { double, double, double }
+
+; Function Attrs: nounwind
+declare %struct.Gradients @__enzyme_fwddiff(double (double)*, ...)
+
+; Function Attrs: noinline nounwind readnone uwtable
+define double @tester(double %x) {
+entry:
+  %agg1 = insertvalue [3 x double] undef, double %x, 0
+  %mul = fmul double %x, %x
+  %agg2 = insertvalue [3 x double] %agg1, double %mul, 1
+  %add = fadd double %mul, 2.0
+  %agg3 = insertvalue [3 x double] %agg2, double %add, 2
+  %res = extractvalue [3 x double] %agg2, 1
+  ret double %res
+}
+
+define %struct.Gradients @test_derivative(double %x) {
+entry:
+  %0 = tail call %struct.Gradients (double (double)*, ...) @__enzyme_fwddiff(double (double)* nonnull @tester, metadata !"enzyme_width", i64 3, double %x, double 1.0, double 2.0, double 3.0)
+  ret %struct.Gradients %0
+}
+
+
+; CHECK: define internal [3 x double] @fwddiffe3tester(double %x, [3 x double] %"x'")
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %0 = extractvalue [3 x double] %"x'", 0
+; CHECK-NEXT:   %1 = extractvalue [3 x double] %"x'", 0
+; CHECK-NEXT:   %2 = fmul fast double %0, %x
+; CHECK-NEXT:   %3 = fmul fast double %1, %x
+; CHECK-NEXT:   %4 = fadd fast double %2, %3
+; CHECK-NEXT:   %5 = extractvalue [3 x double] %"x'", 1
+; CHECK-NEXT:   %6 = extractvalue [3 x double] %"x'", 1
+; CHECK-NEXT:   %7 = fmul fast double %5, %x
+; CHECK-NEXT:   %8 = fmul fast double %6, %x
+; CHECK-NEXT:   %9 = fadd fast double %7, %8
+; CHECK-NEXT:   %10 = extractvalue [3 x double] %"x'", 2
+; CHECK-NEXT:   %11 = extractvalue [3 x double] %"x'", 2
+; CHECK-NEXT:   %12 = fmul fast double %10, %x
+; CHECK-NEXT:   %13 = fmul fast double %11, %x
+; CHECK-NEXT:   %14 = fadd fast double %12, %13
+; CHECK-NEXT:   %15 = insertvalue [3 x double] undef, double %4, 0
+; CHECK-NEXT:   %16 = insertvalue [3 x double] %15, double %9, 1
+; CHECK-NEXT:   %17 = insertvalue [3 x double] %16, double %14, 2
+; CHECK-NEXT:   ret [3 x double] %17
+; CHECK-NEXT: }
diff --git a/enzyme/test/Enzyme/ForwardModeVector/vecsquare.ll b/enzyme/test/Enzyme/ForwardModeVector/vecsquare.ll
@@ -0,0 +1,112 @@
+; RUN: %opt < %s %loadEnzyme -enzyme -enzyme-preopt=false -mem2reg -S | FileCheck %s
+
+%struct.Gradients = type { {float, float, float}, {float, float, float} }
+
+declare %struct.Gradients @__enzyme_fwddiff({float, float, float} (<4 x float>)*, ...)
+
+define {float, float, float} @square(<4 x float> %x) {
+entry:
+  %vec = insertelement <4 x float> %x, float 1.0, i32 3
+  %sq = fmul <4 x float> %x, %x
+  %cb = fmul <4 x float> %sq, %x          
+  %id = shufflevector <4 x float> %sq, <4 x float> %cb, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  %res1 = extractelement <4 x float> %id, i32 1
+  %res2 = extractelement <4 x float> %id, i32 2
+  %res3 = extractelement <4 x float> %id, i32 3
+  %agg1 = insertvalue {float, float, float} undef, float %res1, 0
+  %agg2 = insertvalue {float, float, float} %agg1, float %res2, 1
+  %agg3 = insertvalue {float, float, float} %agg2, float %res3, 2
+  ret {float, float, float} %agg3
+}
+
+define %struct.Gradients @dsquare(<4 x float> %x) {
+entry:
+  %call = tail call %struct.Gradients ({float, float, float} (<4 x float>)*, ...) @__enzyme_fwddiff({float, float, float} (<4 x float>)* @square, metadata !"enzyme_width", i64 2, <4 x float> %x, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
+  ret %struct.Gradients %call
+}
+
+
+; CHECK: define internal [2 x { float, float, float }] @fwddiffe2square(<4 x float> %x, [2 x <4 x float>] %"x'")
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %sq = fmul <4 x float> %x, %x
+; CHECK-NEXT:   %0 = extractvalue [2 x <4 x float>] %"x'", 0
+; CHECK-NEXT:   %1 = extractvalue [2 x <4 x float>] %"x'", 0
+; CHECK-NEXT:   %2 = fmul fast <4 x float> %0, %x
+; CHECK-NEXT:   %3 = fmul fast <4 x float> %1, %x
+; CHECK-NEXT:   %4 = fadd fast <4 x float> %2, %3
+; CHECK-NEXT:   %5 = insertvalue [2 x <4 x float>] undef, <4 x float> %4, 0
+; CHECK-NEXT:   %6 = extractvalue [2 x <4 x float>] %"x'", 1
+; CHECK-NEXT:   %7 = extractvalue [2 x <4 x float>] %"x'", 1
+; CHECK-NEXT:   %8 = fmul fast <4 x float> %6, %x
+; CHECK-NEXT:   %9 = fmul fast <4 x float> %7, %x
+; CHECK-NEXT:   %10 = fadd fast <4 x float> %8, %9
+; CHECK-NEXT:   %11 = insertvalue [2 x <4 x float>] %5, <4 x float> %10, 1
+; CHECK-NEXT:   %cb = fmul <4 x float> %sq, %x
+; CHECK-NEXT:   %12 = extractvalue [2 x <4 x float>] %11, 0
+; CHECK-NEXT:   %13 = extractvalue [2 x <4 x float>] %"x'", 0
+; CHECK-NEXT:   %14 = fmul fast <4 x float> %12, %x
+; CHECK-NEXT:   %15 = fmul fast <4 x float> %13, %sq
+; CHECK-NEXT:   %16 = fadd fast <4 x float> %14, %15
+; CHECK-NEXT:   %17 = insertvalue [2 x <4 x float>] undef, <4 x float> %16, 0
+; CHECK-NEXT:   %18 = extractvalue [2 x <4 x float>] %11, 1
+; CHECK-NEXT:   %19 = extractvalue [2 x <4 x float>] %"x'", 1
+; CHECK-NEXT:   %20 = fmul fast <4 x float> %18, %x
+; CHECK-NEXT:   %21 = fmul fast <4 x float> %19, %sq
+; CHECK-NEXT:   %22 = fadd fast <4 x float> %20, %21
+; CHECK-NEXT:   %23 = insertvalue [2 x <4 x float>] %17, <4 x float> %22, 1
+; CHECK-NEXT:   %id = shufflevector <4 x float> %sq, <4 x float> %cb, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:   %24 = extractvalue [2 x <4 x float>] %11, 0
+; CHECK-NEXT:   %25 = extractvalue [2 x <4 x float>] %23, 0
+; CHECK-NEXT:   %26 = shufflevector <4 x float> %24, <4 x float> %25, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:   %27 = insertvalue [2 x <4 x float>] undef, <4 x float> %26, 0
+; CHECK-NEXT:   %28 = extractvalue [2 x <4 x float>] %11, 1
+; CHECK-NEXT:   %29 = extractvalue [2 x <4 x float>] %23, 1
+; CHECK-NEXT:   %30 = shufflevector <4 x float> %28, <4 x float> %29, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:   %31 = insertvalue [2 x <4 x float>] %27, <4 x float> %30, 1
+; CHECK-NEXT:   %res1 = extractelement <4 x float> %id, i32 1
+; CHECK-NEXT:   %32 = extractvalue [2 x <4 x float>] %31, 0
+; CHECK-NEXT:   %33 = extractelement <4 x float> %32, i32 1
+; CHECK-NEXT:   %34 = insertvalue [2 x float] undef, float %33, 0
+; CHECK-NEXT:   %35 = extractvalue [2 x <4 x float>] %31, 1
+; CHECK-NEXT:   %36 = extractelement <4 x float> %35, i32 1
+; CHECK-NEXT:   %37 = insertvalue [2 x float] %34, float %36, 1
+; CHECK-NEXT:   %res2 = extractelement <4 x float> %id, i32 2
+; CHECK-NEXT:   %38 = extractvalue [2 x <4 x float>] %31, 0
+; CHECK-NEXT:   %39 = extractelement <4 x float> %38, i32 2
+; CHECK-NEXT:   %40 = insertvalue [2 x float] undef, float %39, 0
+; CHECK-NEXT:   %41 = extractvalue [2 x <4 x float>] %31, 1
+; CHECK-NEXT:   %42 = extractelement <4 x float> %41, i32 2
+; CHECK-NEXT:   %43 = insertvalue [2 x float] %40, float %42, 1
+; CHECK-NEXT:   %res3 = extractelement <4 x float> %id, i32 3
+; CHECK-NEXT:   %44 = extractvalue [2 x <4 x float>] %31, 0
+; CHECK-NEXT:   %45 = extractelement <4 x float> %44, i32 3
+; CHECK-NEXT:   %46 = insertvalue [2 x float] undef, float %45, 0
+; CHECK-NEXT:   %47 = extractvalue [2 x <4 x float>] %31, 1
+; CHECK-NEXT:   %48 = extractelement <4 x float> %47, i32 3
+; CHECK-NEXT:   %49 = insertvalue [2 x float] %46, float %48, 1
+; CHECK-NEXT:   %agg1 = insertvalue { float, float, float } undef, float %res1, 0
+; CHECK-NEXT:   %50 = extractvalue [2 x float] %37, 0
+; CHECK-NEXT:   %51 = insertvalue { float, float, float } zeroinitializer, float %50, 0
+; CHECK-NEXT:   %52 = insertvalue [2 x { float, float, float }] undef, { float, float, float } %51, 0
+; CHECK-NEXT:   %53 = extractvalue [2 x float] %37, 1
+; CHECK-NEXT:   %54 = insertvalue { float, float, float } zeroinitializer, float %53, 0
+; CHECK-NEXT:   %55 = insertvalue [2 x { float, float, float }] %52, { float, float, float } %54, 1
+; CHECK-NEXT:   %agg2 = insertvalue { float, float, float } %agg1, float %res2, 1
+; CHECK-NEXT:   %56 = extractvalue [2 x { float, float, float }] %55, 0
+; CHECK-NEXT:   %57 = extractvalue [2 x float] %43, 0
+; CHECK-NEXT:   %58 = insertvalue { float, float, float } %56, float %57, 1
+; CHECK-NEXT:   %59 = insertvalue [2 x { float, float, float }] undef, { float, float, float } %58, 0
+; CHECK-NEXT:   %60 = extractvalue [2 x { float, float, float }] %55, 1
+; CHECK-NEXT:   %61 = extractvalue [2 x float] %43, 1
+; CHECK-NEXT:   %62 = insertvalue { float, float, float } %60, float %61, 1
+; CHECK-NEXT:   %63 = insertvalue [2 x { float, float, float }] %59, { float, float, float } %62, 1
+; CHECK-NEXT:   %64 = extractvalue [2 x { float, float, float }] %63, 0
+; CHECK-NEXT:   %65 = extractvalue [2 x float] %49, 0
+; CHECK-NEXT:   %66 = insertvalue { float, float, float } %64, float %65, 2
+; CHECK-NEXT:   %67 = insertvalue [2 x { float, float, float }] undef, { float, float, float } %66, 0
+; CHECK-NEXT:   %68 = extractvalue [2 x { float, float, float }] %63, 1
+; CHECK-NEXT:   %69 = extractvalue [2 x float] %49, 1
+; CHECK-NEXT:   %70 = insertvalue { float, float, float } %68, float %69, 2
+; CHECK-NEXT:   %71 = insertvalue [2 x { float, float, float }] %67, { float, float, float } %70, 1
+; CHECK-NEXT:   ret [2 x { float, float, float }] %71
+; CHECK-NEXT: }