[ARM] Additional tests and minor formatting. NFC

davemgreen · davemgreen · commit f008b5b8ce72 · 2019-12-09T10:24:33.000Z
This adds some extra cost model tests for shifts, and does some minor
adjustments to some Neon code to make it clear as to what it applies to.
Both NFC.
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -650,50 +650,50 @@ int ARMTTIImpl::getArithmeticInstrCost(
   int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
-  const unsigned FunctionCallDivCost = 20;
-  const unsigned ReciprocalDivCost = 10;
-  static const CostTblEntry CostTbl[] = {
-    // Division.
-    // These costs are somewhat random. Choose a cost of 20 to indicate that
-    // vectorizing devision (added function call) is going to be very expensive.
-    // Double registers types.
-    { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
-    { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
-    { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
-    { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
-    { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
-    { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
-    { ISD::SDIV, MVT::v4i16,     ReciprocalDivCost},
-    { ISD::UDIV, MVT::v4i16,     ReciprocalDivCost},
-    { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
-    { ISD::SDIV, MVT::v8i8,      ReciprocalDivCost},
-    { ISD::UDIV, MVT::v8i8,      ReciprocalDivCost},
-    { ISD::SREM, MVT::v8i8,  8 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v8i8,  8 * FunctionCallDivCost},
-    // Quad register types.
-    { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
-    { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
-    { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
-    { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
-    { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
-    { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
-    { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
-    { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
-    { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
-    { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
-    { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
-    { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
-    // Multiplication.
-  };
-
   if (ST->hasNEON()) {
+    const unsigned FunctionCallDivCost = 20;
+    const unsigned ReciprocalDivCost = 10;
+    static const CostTblEntry CostTbl[] = {
+      // Division.
+      // These costs are somewhat random. Choose a cost of 20 to indicate that
+      // vectorizing devision (added function call) is going to be very expensive.
+      // Double registers types.
+      { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+      { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+      { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
+      { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+      { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+      { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
+      { ISD::SDIV, MVT::v4i16,     ReciprocalDivCost},
+      { ISD::UDIV, MVT::v4i16,     ReciprocalDivCost},
+      { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
+      { ISD::SDIV, MVT::v8i8,      ReciprocalDivCost},
+      { ISD::UDIV, MVT::v8i8,      ReciprocalDivCost},
+      { ISD::SREM, MVT::v8i8,  8 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v8i8,  8 * FunctionCallDivCost},
+      // Quad register types.
+      { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+      { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+      { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
+      { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+      { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+      { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
+      { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+      { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+      { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
+      { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+      { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+      { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
+      // Multiplication.
+    };
+
     if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
       return LT.first * Entry->Cost;
 
diff --git a/llvm/test/Analysis/CostModel/ARM/freeshift.ll b/llvm/test/Analysis/CostModel/ARM/freeshift.ll
@@ -0,0 +1,96 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s
+
+define void @shl(i32 %a, i32 %b) {
+; CHECK-LABEL: 'shl'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ss = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xs = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ns = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %os = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %is = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %as = shl i32 %a, 3
+  %ac = add i32 %b, %as
+  %ss = shl i32 %a, 3
+  %sc = sub i32 %b, %ss
+  %xs = shl i32 %a, 3
+  %xc = xor i32 %b, %xs
+  %ns = shl i32 %a, 3
+  %nc = and i32 %b, %ns
+  %os = shl i32 %a, 3
+  %oc = or i32 %b, %os
+  %is = shl i32 %a, 3
+  %ic = icmp eq i32 %b, %is
+  ret void
+}
+
+define void @ashr(i32 %a, i32 %b) {
+; CHECK-LABEL: 'ashr'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ss = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xs = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ns = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %os = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %is = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %as = ashr i32 %a, 3
+  %ac = add i32 %b, %as
+  %ss = ashr i32 %a, 3
+  %sc = sub i32 %b, %ss
+  %xs = ashr i32 %a, 3
+  %xc = xor i32 %b, %xs
+  %ns = ashr i32 %a, 3
+  %nc = and i32 %b, %ns
+  %os = ashr i32 %a, 3
+  %oc = or i32 %b, %os
+  %is = ashr i32 %a, 3
+  %ic = icmp eq i32 %b, %is
+  ret void
+}
+
+define void @lshr(i32 %a, i32 %b) {
+; CHECK-LABEL: 'lshr'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ss = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xs = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ns = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %os = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %is = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %as = lshr i32 %a, 3
+  %ac = add i32 %b, %as
+  %ss = lshr i32 %a, 3
+  %sc = sub i32 %b, %ss
+  %xs = lshr i32 %a, 3
+  %xc = xor i32 %b, %xs
+  %ns = lshr i32 %a, 3
+  %nc = and i32 %b, %ns
+  %os = lshr i32 %a, 3
+  %oc = or i32 %b, %os
+  %is = lshr i32 %a, 3
+  %ic = icmp eq i32 %b, %is
+  ret void
+}
+
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
@@ -0,0 +1,86 @@
+; RUN: opt -loop-vectorize -enable-arm-maskedldst < %s -S -o - | FileCheck %s --check-prefix=CHECK
+; RUN: opt -loop-vectorize -enable-arm-maskedldst -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-COST
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv8.1m.main-arm-none-eabi"
+
+; CHECK-LABEL: test
+; CHECK-COST: LV: Found an estimated cost of 1 for VF 1 For instruction:   %and515 = shl i32 %l41, 3
+; CHECK-COST: LV: Found an estimated cost of 1 for VF 1 For instruction:   %l45 = and i32 %and515, 131072
+; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction:   %and515 = shl i32 %l41, 3
+; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction:   %l45 = and i32 %and515, 131072
+; CHECK: vector.body
+
+define void @test([101 x i32] *%src, i32 %N) #0 {
+entry:
+  br label %for.body386
+  
+for.body386:                                      ; preds = %entry, %l77
+  %add387 = phi i32 [ %inc532, %l77 ], [ 0, %entry ]
+  %arrayidx388 = getelementptr inbounds [101 x i32], [101 x i32]* %src, i32 0, i32 %add387
+  %l41 = load i32, i32* %arrayidx388, align 4
+  %l42 = and i32 %l41, 65535
+  %l43 = icmp eq i32 %l42, 0
+  br i1 %l43, label %l77, label %l44
+
+l44:                                               ; preds = %for.body386
+  %and515 = shl i32 %l41, 3
+  %l45 = and i32 %and515, 131072
+  %and506 = shl i32 %l41, 5
+  %l46 = and i32 %and506, 262144
+  %and497 = shl i32 %l41, 7
+  %l47 = and i32 %and497, 524288
+  %and488 = shl i32 %l41, 9
+  %l48 = and i32 %and488, 1048576
+  %and479 = shl i32 %l41, 11
+  %l49 = and i32 %and479, 2097152
+  %and470 = shl i32 %l41, 13
+  %l50 = and i32 %and470, 4194304
+  %and461 = shl i32 %l41, 15
+  %l51 = and i32 %and461, 8388608
+  %and452 = shl i32 %l41, 17
+  %l52 = and i32 %and452, 16777216
+  %and443 = shl i32 %l41, 19
+  %l53 = and i32 %and443, 33554432
+  %and434 = shl i32 %l41, 21
+  %l54 = and i32 %and434, 67108864
+  %and425 = shl i32 %l41, 23
+  %l55 = and i32 %and425, 134217728
+  %and416 = shl i32 %l41, 25
+  %l56 = and i32 %and416, 268435456
+  %and407 = shl i32 %l41, 27
+  %l57 = and i32 %and407, 536870912
+  %and398 = shl i32 %l41, 29
+  %l58 = and i32 %and398, 1073741824
+  %l59 = shl i32 %l41, 31
+  %l60 = or i32 %l59, %l41
+  %l61 = or i32 %l58, %l60
+  %l62 = or i32 %l57, %l61
+  %l63 = or i32 %l56, %l62
+  %l64 = or i32 %l55, %l63
+  %l65 = or i32 %l54, %l64
+  %l66 = or i32 %l53, %l65
+  %l67 = or i32 %l52, %l66
+  %l68 = or i32 %l51, %l67
+  %l69 = or i32 %l50, %l68
+  %l70 = or i32 %l49, %l69
+  %l71 = or i32 %l48, %l70
+  %l72 = or i32 %l47, %l71
+  %l73 = or i32 %l46, %l72
+  %l74 = or i32 %l45, %l73
+  %and524 = shl i32 %l41, 1
+  %l75 = and i32 %and524, 65536
+  %l76 = or i32 %l75, %l74
+  store i32 %l76, i32* %arrayidx388, align 4
+  br label %l77
+
+l77:                                               ; preds = %for.body386, %l44
+  %inc532 = add nuw nsw i32 %add387, 1
+  %exitcond649 = icmp eq i32 %inc532, %N
+  br i1 %exitcond649, label %exit, label %for.body386
+
+exit:
+  ret void
+}
+
+attributes #0 = { nounwind "min-legal-vector-width"="0" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+fp-armv8d16sp,+fp16,+fpregs,+fullfp16,+hwdiv,+lob,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" "use-soft-float"="false" }