-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[VectorCombine] Scalarize binop-like intrinsics #138095
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1d031ea
ebfcbe4
4374304
968f061
d559e15
3c3f7e3
fea2417
51dccf3
fbce2ad
c2f403d
50789af
3f305fd
766f33d
befd9ba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: opt < %s -S -p vector-combine -mtriple=x86_64 -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 | ||
; RUN: opt < %s -S -p vector-combine -mtriple=x86_64 -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 | ||
|
||
define <2 x float> @maxnum(float %x, float %y) { | ||
; SSE2-LABEL: define <2 x float> @maxnum( | ||
; SSE2-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] { | ||
; SSE2-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> poison, float [[X]], i32 0 | ||
; SSE2-NEXT: [[Y_INSERT:%.*]] = insertelement <2 x float> poison, float [[Y]], i32 0 | ||
; SSE2-NEXT: [[V:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[X_INSERT]], <2 x float> [[Y_INSERT]]) | ||
; SSE2-NEXT: ret <2 x float> [[V]] | ||
; | ||
; AVX2-LABEL: define <2 x float> @maxnum( | ||
; AVX2-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] { | ||
; AVX2-NEXT: [[V_SCALAR:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) | ||
; AVX2-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> poison, <2 x float> poison) | ||
; AVX2-NEXT: [[V:%.*]] = insertelement <2 x float> [[TMP1]], float [[V_SCALAR]], i64 0 | ||
; AVX2-NEXT: ret <2 x float> [[V]] | ||
; | ||
%x.insert = insertelement <2 x float> poison, float %x, i32 0 | ||
%y.insert = insertelement <2 x float> poison, float %y, i32 0 | ||
%v = call <2 x float> @llvm.maxnum(<2 x float> %x.insert, <2 x float> %y.insert) | ||
ret <2 x float> %v | ||
} | ||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: | ||
; CHECK: {{.*}} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: opt < %s -S -p vector-combine | FileCheck %s | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we have any tests that we can get different results based off costs? might need to be moved into a specific target? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, I've added a test that shows a difference in costs between sse/avx in 3f305fd |
||
define <4 x i32> @umax_fixed(i32 %x, i32 %y) { | ||
; CHECK-LABEL: define <4 x i32> @umax_fixed( | ||
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { | ||
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]]) | ||
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> poison, <4 x i32> poison) | ||
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0 | ||
; CHECK-NEXT: ret <4 x i32> [[V]] | ||
; | ||
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0 | ||
%y.insert = insertelement <4 x i32> poison, i32 %y, i32 0 | ||
%v = call <4 x i32> @llvm.umax(<4 x i32> %x.insert, <4 x i32> %y.insert) | ||
ret <4 x i32> %v | ||
} | ||
|
||
define <vscale x 4 x i32> @umax_scalable(i32 %x, i32 %y) { | ||
; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable( | ||
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { | ||
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]]) | ||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison) | ||
; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0 | ||
; CHECK-NEXT: ret <vscale x 4 x i32> [[V]] | ||
; | ||
%x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 | ||
%y.insert = insertelement <vscale x 4 x i32> poison, i32 %y, i32 0 | ||
%v = call <vscale x 4 x i32> @llvm.umax(<vscale x 4 x i32> %x.insert, <vscale x 4 x i32> %y.insert) | ||
ret <vscale x 4 x i32> %v | ||
} | ||
|
||
define <4 x i32> @umax_fixed_lhs_const(i32 %x) { | ||
; CHECK-LABEL: define <4 x i32> @umax_fixed_lhs_const( | ||
; CHECK-SAME: i32 [[X:%.*]]) { | ||
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 1, i32 [[X]]) | ||
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> poison) | ||
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0 | ||
; CHECK-NEXT: ret <4 x i32> [[V]] | ||
; | ||
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0 | ||
%v = call <4 x i32> @llvm.umax(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> %x.insert) | ||
ret <4 x i32> %v | ||
} | ||
|
||
define <4 x i32> @umax_fixed_rhs_const(i32 %x) { | ||
; CHECK-LABEL: define <4 x i32> @umax_fixed_rhs_const( | ||
; CHECK-SAME: i32 [[X:%.*]]) { | ||
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 1) | ||
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>) | ||
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0 | ||
; CHECK-NEXT: ret <4 x i32> [[V]] | ||
; | ||
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0 | ||
%v = call <4 x i32> @llvm.umax(<4 x i32> %x.insert, <4 x i32> <i32 1, i32 2, i32 3, i32 4>) | ||
ret <4 x i32> %v | ||
} | ||
|
||
define <vscale x 4 x i32> @umax_scalable_lhs_const(i32 %x) { | ||
; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_lhs_const( | ||
; CHECK-SAME: i32 [[X:%.*]]) { | ||
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 42, i32 [[X]]) | ||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> splat (i32 42), <vscale x 4 x i32> poison) | ||
; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0 | ||
; CHECK-NEXT: ret <vscale x 4 x i32> [[V]] | ||
; | ||
%x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 | ||
%v = call <vscale x 4 x i32> @llvm.umax(<vscale x 4 x i32> splat (i32 42), <vscale x 4 x i32> %x.insert) | ||
ret <vscale x 4 x i32> %v | ||
} | ||
|
||
define <vscale x 4 x i32> @umax_scalable_rhs_const(i32 %x) { | ||
; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_rhs_const( | ||
; CHECK-SAME: i32 [[X:%.*]]) { | ||
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 42) | ||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> splat (i32 42)) | ||
; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0 | ||
; CHECK-NEXT: ret <vscale x 4 x i32> [[V]] | ||
; | ||
%x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 | ||
%v = call <vscale x 4 x i32> @llvm.umax(<vscale x 4 x i32> %x.insert, <vscale x 4 x i32> splat (i32 42)) | ||
ret <vscale x 4 x i32> %v | ||
} | ||
|
||
; Shouldn't be scalarized, not a "trivially vectorizable" intrinsic. | ||
define <4 x i32> @non_trivially_vectorizable(i32 %x, i32 %y) { | ||
; CHECK-LABEL: define <4 x i32> @non_trivially_vectorizable( | ||
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { | ||
; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0 | ||
; CHECK-NEXT: [[Y_INSERT:%.*]] = insertelement <8 x i32> poison, i32 [[Y]], i32 0 | ||
; CHECK-NEXT: [[V:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v8i32(<4 x i32> [[X_INSERT]], <8 x i32> [[Y_INSERT]]) | ||
; CHECK-NEXT: ret <4 x i32> [[V]] | ||
; | ||
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0 | ||
%y.insert = insertelement <8 x i32> poison, i32 %y, i32 0 | ||
%v = call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %x.insert, <8 x i32> %y.insert) | ||
ret <4 x i32> %v | ||
} | ||
|
||
; TODO: We should be able to scalarize this if we preserve the scalar argument. | ||
define <4 x float> @scalar_argument(float %x) { | ||
; CHECK-LABEL: define <4 x float> @scalar_argument( | ||
; CHECK-SAME: float [[X:%.*]]) { | ||
; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i32 0 | ||
; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[X_INSERT]], i32 42) | ||
; CHECK-NEXT: ret <4 x float> [[V]] | ||
; | ||
%x.insert = insertelement <4 x float> poison, float %x, i32 0 | ||
%v = call <4 x float> @llvm.powi(<4 x float> %x.insert, i32 42) | ||
ret <4 x float> %v | ||
} | ||
|
||
define <4 x i2> @scmp(i32 %x) { | ||
; CHECK-LABEL: define <4 x i2> @scmp( | ||
; CHECK-SAME: i32 [[X:%.*]]) { | ||
; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0 | ||
; CHECK-NEXT: [[V:%.*]] = call <4 x i2> @llvm.scmp.v4i2.v4i32(<4 x i32> [[X_INSERT]], <4 x i32> zeroinitializer) | ||
; CHECK-NEXT: ret <4 x i2> [[V]] | ||
; | ||
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0 | ||
%v = call <4 x i2> @llvm.scmp(<4 x i32> %x.insert, <4 x i32> splat (i32 0)) | ||
ret <4 x i2> %v | ||
} |
Uh oh!
There was an error while loading. Please reload this page.