-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[llvm][AArch64] Improve the cost model for i128 div's #107306
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[llvm][AArch64] Improve the cost model for i128 div's #107306
Conversation
They're implemented via libcalls, which I've microbenchmarked to take about 14 and 16 cycles for udiv and sdiv i128 respectively on this Apple M2 Max.
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-analysis Author: Jon Roelofs (jroelofs) ChangesThey're implemented via libcalls, which I've microbenchmarked to take about 14 and 16 cycles for udiv and sdiv i128 respectively on this Apple M2 Max. Patch is 20.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/107306.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 58c267f1ce4bd6..a94bba6150494f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3222,6 +3222,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
Op1Info.getNoProps(), Op2Info.getNoProps());
return Cost;
}
+ if (TLI->getValueType(DL, Ty) == MVT::i128)
+ return 16;
[[fallthrough]];
case ISD::UDIV: {
if (Op2Info.isConstant() && Op2Info.isUniform()) {
@@ -3240,6 +3242,10 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
}
}
+ auto VT = TLI->getValueType(DL, Ty);
+ if (VT == MVT::i128)
+ return 14;
+
InstructionCost Cost = BaseT::getArithmeticInstrCost(
Opcode, Ty, CostKind, Op1Info, Op2Info);
if (Ty->isVectorTy()) {
diff --git a/llvm/test/Analysis/CostModel/AArch64/div.ll b/llvm/test/Analysis/CostModel/AArch64/div.ll
index 7b52e2dc18aa52..9c23f5f53f1028 100644
--- a/llvm/test/Analysis/CostModel/AArch64/div.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div.ll
@@ -5,6 +5,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define i32 @sdiv() {
; CHECK-LABEL: 'sdiv'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %I128 = sdiv i128 undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = sdiv <2 x i64> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = sdiv <4 x i64> undef, undef
@@ -23,6 +24,8 @@ define i32 @sdiv() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = sdiv <64 x i8> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+ %I128 = sdiv i128 undef, undef
+
%I64 = sdiv i64 undef, undef
%V2i64 = sdiv <2 x i64> undef, undef
%V4i64 = sdiv <4 x i64> undef, undef
@@ -48,6 +51,7 @@ define i32 @sdiv() {
define i32 @udiv() {
; CHECK-LABEL: 'udiv'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = udiv i128 undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = udiv <2 x i64> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = udiv <4 x i64> undef, undef
@@ -66,6 +70,8 @@ define i32 @udiv() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = udiv <64 x i8> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+ %I128 = udiv i128 undef, undef
+
%I64 = udiv i64 undef, undef
%V2i64 = udiv <2 x i64> undef, undef
%V4i64 = udiv <4 x i64> undef, undef
@@ -91,6 +97,7 @@ define i32 @udiv() {
define i32 @sdiv_const() {
; CHECK-LABEL: 'sdiv_const'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %I128 = sdiv i128 undef, 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 6, i64 7>
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
@@ -109,6 +116,8 @@ define i32 @sdiv_const() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+ %I128 = sdiv i128 undef, 7
+
%I64 = sdiv i64 undef, 7
%V2i64 = sdiv <2 x i64> undef, <i64 6, i64 7>
%V4i64 = sdiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
@@ -134,6 +143,7 @@ define i32 @sdiv_const() {
define i32 @udiv_const() {
; CHECK-LABEL: 'udiv_const'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = udiv i128 undef, 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 6, i64 7>
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
@@ -152,6 +162,9 @@ define i32 @udiv_const() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+
+ %I128 = udiv i128 undef, 7
+
%I64 = udiv i64 undef, 7
%V2i64 = udiv <2 x i64> undef, <i64 6, i64 7>
%V4i64 = udiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
@@ -177,6 +190,7 @@ define i32 @udiv_const() {
define i32 @sdiv_uniformconst() {
; CHECK-LABEL: 'sdiv_uniformconst'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %I128 = sdiv i128 undef, 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
@@ -195,6 +209,8 @@ define i32 @sdiv_uniformconst() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+ %I128 = sdiv i128 undef, 7
+
%I64 = sdiv i64 undef, 7
%V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
%V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
@@ -220,6 +236,7 @@ define i32 @sdiv_uniformconst() {
define i32 @udiv_uniformconst() {
; CHECK-LABEL: 'udiv_uniformconst'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = udiv i128 undef, 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
@@ -238,6 +255,8 @@ define i32 @udiv_uniformconst() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+ %I128 = udiv i128 undef, 7
+
%I64 = udiv i64 undef, 7
%V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
%V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
@@ -263,6 +282,7 @@ define i32 @udiv_uniformconst() {
define i32 @sdiv_constpow2() {
; CHECK-LABEL: 'sdiv_constpow2'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = sdiv i64 undef, 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 8, i64 16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
@@ -281,6 +301,8 @@ define i32 @sdiv_constpow2() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+ %I128 = sdiv i128 undef, 16
+
%I64 = sdiv i64 undef, 16
%V2i64 = sdiv <2 x i64> undef, <i64 8, i64 16>
%V4i64 = sdiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
@@ -306,6 +328,7 @@ define i32 @sdiv_constpow2() {
define i32 @udiv_constpow2() {
; CHECK-LABEL: 'udiv_constpow2'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = udiv i128 undef, 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 8, i64 16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
@@ -324,6 +347,8 @@ define i32 @udiv_constpow2() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+ %I128 = udiv i128 undef, 16
+
%I64 = udiv i64 undef, 16
%V2i64 = udiv <2 x i64> undef, <i64 8, i64 16>
%V4i64 = udiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
@@ -349,6 +374,7 @@ define i32 @udiv_constpow2() {
define i32 @sdiv_uniformconstpow2() {
; CHECK-LABEL: 'sdiv_uniformconstpow2'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = sdiv i64 undef, 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
@@ -367,6 +393,8 @@ define i32 @sdiv_uniformconstpow2() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+ %I128 = sdiv i128 undef, 16
+
%I64 = sdiv i64 undef, 16
%V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
%V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
@@ -392,6 +420,7 @@ define i32 @sdiv_uniformconstpow2() {
define i32 @udiv_uniformconstpow2() {
; CHECK-LABEL: 'udiv_uniformconstpow2'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = udiv i128 undef, 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
@@ -410,6 +439,8 @@ define i32 @udiv_uniformconstpow2() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+ %I128 = udiv i128 undef, 16
+
%I64 = udiv i64 undef, 16
%V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
%V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
@@ -435,6 +466,7 @@ define i32 @udiv_uniformconstpow2() {
define i32 @sdiv_constnegpow2() {
; CHECK-LABEL: 'sdiv_constnegpow2'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %I128 = sdiv i128 undef, -16
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, -16
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -8, i64 -16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
@@ -453,6 +485,8 @@ define i32 @sdiv_constnegpow2() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+ %I128 = sdiv i128 undef, -16
+
%I64 = sdiv i64 undef, -16
%V2i64 = sdiv <2 x i64> undef, <i64 -8, i64 -16>
%V4i64 = sdiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
@@ -478,6 +512,7 @@ define i32 @sdiv_constnegpow2() {
define i32 @udiv_constnegpow2() {
; CHECK-LABEL: 'udiv_constnegpow2'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = udiv i128 undef, -16
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, -16
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -8, i64 -16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
@@ -496,6 +531,8 @@ define i32 @udiv_constnegpow2() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+ %I128 = udiv i128 undef, -16
+
%I64 = udiv i64 undef, -16
%V2i64 = udiv <2 x i64> undef, <i64 -8, i64 -16>
%V4i64 = udiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
@@ -521,6 +558,7 @@ define i32 @udiv_constnegpow2() {
define i32 @sdiv_uniformconstnegpow2() {
; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %I128 = sdiv i128 undef, -16
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, -16
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
@@ -539,6 +577,8 @@ define i32 @sdiv_uniformconstnegpow2() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+ %I128 = sdiv i128 undef, -16
+
%I64 = sdiv i64 undef, -16
%V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
%V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
@@ -564,6 +604,7 @@ define i32 @sdiv_uniformconstnegpow2() {
define i32 @udiv_uniformconstnegpow2() {
; CHECK-LABEL: 'udiv_uniformconstnegpow2'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = udiv i128 undef, -16
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, -16
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
@@ -582,6 +623,8 @@ define i32 @udiv_uniformconstnegpow2() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
+ %I128 = udiv i128 undef, -16
+
%I64 = udiv i64 undef, -1...
[truncated]
|
review feedback from dtellenbach
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We tend to use 10 as the cost of a libcall. Does what you are doing need to be more precise than that?
It might be worth using size > 64
. For larger sizes the codegen is horrendous, but giving it a higher cost sounds at least more correct! It could be 10 * type legalization cost or something like it.
Otherwise this sounds good to me. Let me know what you think.
I don't need precision for the case I'm trying to avoid: the vectorizer thought this was way too cheap and emitted one in a runtime check. So... the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, LGTM. Its good to hear that the the call overhead and the call roughly match up in this case.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/5624 Here is the relevant piece of the build log for the reference
|
No description provided.