Skip to content

Commit f008b5b

Browse files
committed
[ARM] Additional tests and minor formatting. NFC
This adds some extra cost model tests for shifts, and does some minor adjustments to some Neon code to make it clear as to what it applies to. Both NFC.
1 parent 6d5c273 commit f008b5b

File tree

3 files changed

+225
-43
lines changed

3 files changed

+225
-43
lines changed

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

+43-43
Original file line numberDiff line numberDiff line change
@@ -650,50 +650,50 @@ int ARMTTIImpl::getArithmeticInstrCost(
650650
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
651651
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
652652

653-
const unsigned FunctionCallDivCost = 20;
654-
const unsigned ReciprocalDivCost = 10;
655-
static const CostTblEntry CostTbl[] = {
656-
// Division.
657-
// These costs are somewhat random. Choose a cost of 20 to indicate that
658-
// vectorizing devision (added function call) is going to be very expensive.
659-
// Double registers types.
660-
{ ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
661-
{ ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
662-
{ ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
663-
{ ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
664-
{ ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
665-
{ ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
666-
{ ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
667-
{ ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
668-
{ ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
669-
{ ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
670-
{ ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
671-
{ ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
672-
{ ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
673-
{ ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
674-
{ ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
675-
{ ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
676-
// Quad register types.
677-
{ ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
678-
{ ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
679-
{ ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
680-
{ ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
681-
{ ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
682-
{ ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
683-
{ ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
684-
{ ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
685-
{ ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
686-
{ ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
687-
{ ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
688-
{ ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
689-
{ ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
690-
{ ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
691-
{ ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
692-
{ ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
693-
// Multiplication.
694-
};
695-
696653
if (ST->hasNEON()) {
654+
const unsigned FunctionCallDivCost = 20;
655+
const unsigned ReciprocalDivCost = 10;
656+
static const CostTblEntry CostTbl[] = {
657+
// Division.
658+
// These costs are somewhat random. Choose a cost of 20 to indicate that
659+
// vectorizing devision (added function call) is going to be very expensive.
660+
// Double registers types.
661+
{ ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
662+
{ ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
663+
{ ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
664+
{ ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
665+
{ ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
666+
{ ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
667+
{ ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
668+
{ ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
669+
{ ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
670+
{ ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
671+
{ ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
672+
{ ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
673+
{ ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
674+
{ ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
675+
{ ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
676+
{ ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
677+
// Quad register types.
678+
{ ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
679+
{ ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
680+
{ ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
681+
{ ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
682+
{ ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
683+
{ ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
684+
{ ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
685+
{ ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
686+
{ ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
687+
{ ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
688+
{ ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
689+
{ ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
690+
{ ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
691+
{ ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
692+
{ ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
693+
{ ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
694+
// Multiplication.
695+
};
696+
697697
if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
698698
return LT.first * Entry->Cost;
699699

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2+
; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s
3+
4+
define void @shl(i32 %a, i32 %b) {
5+
; CHECK-LABEL: 'shl'
6+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = shl i32 %a, 3
7+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
8+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ss = shl i32 %a, 3
9+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xs = shl i32 %a, 3
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ns = shl i32 %a, 3
13+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
14+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %os = shl i32 %a, 3
15+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
16+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %is = shl i32 %a, 3
17+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
18+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
19+
;
20+
%as = shl i32 %a, 3
21+
%ac = add i32 %b, %as
22+
%ss = shl i32 %a, 3
23+
%sc = sub i32 %b, %ss
24+
%xs = shl i32 %a, 3
25+
%xc = xor i32 %b, %xs
26+
%ns = shl i32 %a, 3
27+
%nc = and i32 %b, %ns
28+
%os = shl i32 %a, 3
29+
%oc = or i32 %b, %os
30+
%is = shl i32 %a, 3
31+
%ic = icmp eq i32 %b, %is
32+
ret void
33+
}
34+
35+
define void @ashr(i32 %a, i32 %b) {
36+
; CHECK-LABEL: 'ashr'
37+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = ashr i32 %a, 3
38+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
39+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ss = ashr i32 %a, 3
40+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
41+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xs = ashr i32 %a, 3
42+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
43+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ns = ashr i32 %a, 3
44+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
45+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %os = ashr i32 %a, 3
46+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
47+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %is = ashr i32 %a, 3
48+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
49+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
50+
;
51+
%as = ashr i32 %a, 3
52+
%ac = add i32 %b, %as
53+
%ss = ashr i32 %a, 3
54+
%sc = sub i32 %b, %ss
55+
%xs = ashr i32 %a, 3
56+
%xc = xor i32 %b, %xs
57+
%ns = ashr i32 %a, 3
58+
%nc = and i32 %b, %ns
59+
%os = ashr i32 %a, 3
60+
%oc = or i32 %b, %os
61+
%is = ashr i32 %a, 3
62+
%ic = icmp eq i32 %b, %is
63+
ret void
64+
}
65+
66+
define void @lshr(i32 %a, i32 %b) {
67+
; CHECK-LABEL: 'lshr'
68+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = lshr i32 %a, 3
69+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
70+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ss = lshr i32 %a, 3
71+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
72+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xs = lshr i32 %a, 3
73+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
74+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ns = lshr i32 %a, 3
75+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
76+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %os = lshr i32 %a, 3
77+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
78+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %is = lshr i32 %a, 3
79+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
80+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
81+
;
82+
%as = lshr i32 %a, 3
83+
%ac = add i32 %b, %as
84+
%ss = lshr i32 %a, 3
85+
%sc = sub i32 %b, %ss
86+
%xs = lshr i32 %a, 3
87+
%xc = xor i32 %b, %xs
88+
%ns = lshr i32 %a, 3
89+
%nc = and i32 %b, %ns
90+
%os = lshr i32 %a, 3
91+
%oc = or i32 %b, %os
92+
%is = lshr i32 %a, 3
93+
%ic = icmp eq i32 %b, %is
94+
ret void
95+
}
96+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
; RUN: opt -loop-vectorize -enable-arm-maskedldst < %s -S -o - | FileCheck %s --check-prefix=CHECK
2+
; RUN: opt -loop-vectorize -enable-arm-maskedldst -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-COST
3+
4+
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
5+
target triple = "thumbv8.1m.main-arm-none-eabi"
6+
7+
; CHECK-LABEL: test
8+
; CHECK-COST: LV: Found an estimated cost of 1 for VF 1 For instruction: %and515 = shl i32 %l41, 3
9+
; CHECK-COST: LV: Found an estimated cost of 1 for VF 1 For instruction: %l45 = and i32 %and515, 131072
10+
; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction: %and515 = shl i32 %l41, 3
11+
; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction: %l45 = and i32 %and515, 131072
12+
; CHECK: vector.body
13+
14+
define void @test([101 x i32] *%src, i32 %N) #0 {
15+
entry:
16+
br label %for.body386
17+
18+
for.body386: ; preds = %entry, %l77
19+
%add387 = phi i32 [ %inc532, %l77 ], [ 0, %entry ]
20+
%arrayidx388 = getelementptr inbounds [101 x i32], [101 x i32]* %src, i32 0, i32 %add387
21+
%l41 = load i32, i32* %arrayidx388, align 4
22+
%l42 = and i32 %l41, 65535
23+
%l43 = icmp eq i32 %l42, 0
24+
br i1 %l43, label %l77, label %l44
25+
26+
l44: ; preds = %for.body386
27+
%and515 = shl i32 %l41, 3
28+
%l45 = and i32 %and515, 131072
29+
%and506 = shl i32 %l41, 5
30+
%l46 = and i32 %and506, 262144
31+
%and497 = shl i32 %l41, 7
32+
%l47 = and i32 %and497, 524288
33+
%and488 = shl i32 %l41, 9
34+
%l48 = and i32 %and488, 1048576
35+
%and479 = shl i32 %l41, 11
36+
%l49 = and i32 %and479, 2097152
37+
%and470 = shl i32 %l41, 13
38+
%l50 = and i32 %and470, 4194304
39+
%and461 = shl i32 %l41, 15
40+
%l51 = and i32 %and461, 8388608
41+
%and452 = shl i32 %l41, 17
42+
%l52 = and i32 %and452, 16777216
43+
%and443 = shl i32 %l41, 19
44+
%l53 = and i32 %and443, 33554432
45+
%and434 = shl i32 %l41, 21
46+
%l54 = and i32 %and434, 67108864
47+
%and425 = shl i32 %l41, 23
48+
%l55 = and i32 %and425, 134217728
49+
%and416 = shl i32 %l41, 25
50+
%l56 = and i32 %and416, 268435456
51+
%and407 = shl i32 %l41, 27
52+
%l57 = and i32 %and407, 536870912
53+
%and398 = shl i32 %l41, 29
54+
%l58 = and i32 %and398, 1073741824
55+
%l59 = shl i32 %l41, 31
56+
%l60 = or i32 %l59, %l41
57+
%l61 = or i32 %l58, %l60
58+
%l62 = or i32 %l57, %l61
59+
%l63 = or i32 %l56, %l62
60+
%l64 = or i32 %l55, %l63
61+
%l65 = or i32 %l54, %l64
62+
%l66 = or i32 %l53, %l65
63+
%l67 = or i32 %l52, %l66
64+
%l68 = or i32 %l51, %l67
65+
%l69 = or i32 %l50, %l68
66+
%l70 = or i32 %l49, %l69
67+
%l71 = or i32 %l48, %l70
68+
%l72 = or i32 %l47, %l71
69+
%l73 = or i32 %l46, %l72
70+
%l74 = or i32 %l45, %l73
71+
%and524 = shl i32 %l41, 1
72+
%l75 = and i32 %and524, 65536
73+
%l76 = or i32 %l75, %l74
74+
store i32 %l76, i32* %arrayidx388, align 4
75+
br label %l77
76+
77+
l77: ; preds = %for.body386, %l44
78+
%inc532 = add nuw nsw i32 %add387, 1
79+
%exitcond649 = icmp eq i32 %inc532, %N
80+
br i1 %exitcond649, label %exit, label %for.body386
81+
82+
exit:
83+
ret void
84+
}
85+
86+
attributes #0 = { nounwind "min-legal-vector-width"="0" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+fp-armv8d16sp,+fp16,+fpregs,+fullfp16,+hwdiv,+lob,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" "use-soft-float"="false" }

0 commit comments

Comments
 (0)