-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[VPlan] Compute induction end values in VPlan. #112145
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 24 commits
e58cb96
31c1c5f
6440a91
c6cc339
c9c38c8
4fd2d0e
be373bf
071392e
b48b097
499bc8a
ec6d6e0
5eb0cbc
18b8e8c
44a0799
3715110
8a1748a
bbdc0bb
89a1b3a
d74aca2
b19f10a
07c7874
8f701a3
378caa2
a763436
d7c28cc
4265a86
e51c6fb
4bd9416
a87d409
7897ab7
f3c6387
6c8def5
1e44bdd
5f92f65
6e860aa
edfc01b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1063,6 +1063,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { | |
|
||
if (match(&R, m_Not(m_Not(m_VPValue(A))))) | ||
return R.getVPSingleValue()->replaceAllUsesWith(A); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some explanation of the patterns folded below? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! Are these patterns used/tested? Given the efforts to avoid introducing redundant DerivedIV recipes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes those 2 cases are covered by the DerivedIVs introduced in this patch. |
||
// Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0. | ||
if ((match(&R, | ||
m_DerivedIV(m_SpecificInt(0), m_VPValue(A), m_SpecificInt(1))) || | ||
match(&R, | ||
m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) && | ||
TypeInfo.inferScalarType(R.getOperand(1)) == | ||
TypeInfo.inferScalarType(R.getVPSingleValue())) | ||
return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1)); | ||
} | ||
|
||
/// Move loop-invariant recipes out of the vector loop region in \p Plan. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -74,6 +74,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef | |
; CHECK-VS1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] | ||
; CHECK-VS1-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() | ||
; CHECK-VS1-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16 | ||
; CHECK-VS1-NEXT: [[TMP40:%.*]] = add i64 [[TMP0]], [[N_VEC]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unused instruction? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes unfortunately now we create the end VPValues during VPlan construction and now during the legacy epilogue skeleton code generation there's no convenient way to remove them for the epilogue only case I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth recording a FIXME somewhere to handle when epilog and main are covered by one VPlan? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added TODO, thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be gone now, thanks |
||
; CHECK-VS1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0 | ||
; CHECK-VS1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer | ||
; CHECK-VS1-NEXT: br label %[[VECTOR_BODY:.*]] | ||
|
@@ -105,9 +106,9 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef | |
; CHECK-VS1-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 8 | ||
; CHECK-VS1-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP3]], [[TMP29]] | ||
; CHECK-VS1-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF2]] | ||
; CHECK-VS1-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC3]] | ||
; CHECK-VS1-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() | ||
; CHECK-VS1-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 8 | ||
; CHECK-VS1-NEXT: [[TMP39:%.*]] = add i64 [[TMP0]], [[N_VEC3]] | ||
; CHECK-VS1-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i8> poison, i8 [[CONV]], i64 0 | ||
; CHECK-VS1-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 8 x i8> [[BROADCAST_SPLATINSERT7]], <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer | ||
; CHECK-VS1-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] | ||
|
@@ -127,7 +128,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef | |
; CHECK-VS1-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] | ||
; CHECK-VS1-NEXT: br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] | ||
; CHECK-VS1: [[VEC_EPILOG_SCALAR_PH]]: | ||
; CHECK-VS1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ] | ||
; CHECK-VS1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ] | ||
; CHECK-VS1-NEXT: br label %[[WHILE_BODY:.*]] | ||
; CHECK-VS1: [[WHILE_BODY]]: | ||
; CHECK-VS1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ] | ||
|
@@ -182,6 +183,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef | |
; CHECK-VS2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] | ||
; CHECK-VS2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() | ||
; CHECK-VS2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 | ||
; CHECK-VS2-NEXT: [[TMP40:%.*]] = add i64 [[TMP0]], [[N_VEC]] | ||
; CHECK-VS2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i8> poison, i8 [[CONV]], i64 0 | ||
; CHECK-VS2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer | ||
; CHECK-VS2-NEXT: br label %[[VECTOR_BODY:.*]] | ||
|
@@ -213,9 +215,9 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef | |
; CHECK-VS2-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 4 | ||
; CHECK-VS2-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP3]], [[TMP29]] | ||
; CHECK-VS2-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF2]] | ||
; CHECK-VS2-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC3]] | ||
; CHECK-VS2-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() | ||
; CHECK-VS2-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 4 | ||
; CHECK-VS2-NEXT: [[TMP39:%.*]] = add i64 [[TMP0]], [[N_VEC3]] | ||
; CHECK-VS2-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[CONV]], i64 0 | ||
; CHECK-VS2-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 4 x i8> [[BROADCAST_SPLATINSERT7]], <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer | ||
; CHECK-VS2-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] | ||
|
@@ -235,7 +237,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef | |
; CHECK-VS2-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] | ||
; CHECK-VS2-NEXT: br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] | ||
; CHECK-VS2: [[VEC_EPILOG_SCALAR_PH]]: | ||
; CHECK-VS2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ] | ||
; CHECK-VS2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ] | ||
; CHECK-VS2-NEXT: br label %[[WHILE_BODY:.*]] | ||
; CHECK-VS2: [[WHILE_BODY]]: | ||
; CHECK-VS2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ] | ||
|
@@ -428,9 +430,9 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % | |
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP1]], [[TMP4]] | ||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] | ||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] | ||
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC]] | ||
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() | ||
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 | ||
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC]] | ||
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP1]]) | ||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0 | ||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks!