Skip to content

Commit d465850

Browse files
committed
[X86][ArgPromotion] Do not assume large vectors or aggregates ABI compatible
1 parent 0eac945 commit d465850

File tree

3 files changed

+67
-71
lines changed

3 files changed

+67
-71
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6133,15 +6133,17 @@ bool X86TTIImpl::areTypesABICompatible(const Function *Caller,
61336133
// incompatible.
61346134
const TargetMachine &TM = getTLI()->getTargetMachine();
61356135

6136-
if (TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() ==
6136+
// AVX512 supports the largest vector length, so no ABI compatible issue.
6137+
if (TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() &&
61376138
TM.getSubtarget<X86Subtarget>(*Callee).useAVX512Regs())
61386139
return true;
61396140

6140-
// Consider the arguments compatible if they aren't vectors or aggregates.
6141-
// FIXME: Look at the size of vectors.
6142-
// FIXME: Look at the element types of aggregates to see if there are vectors.
6143-
return llvm::none_of(Types,
6144-
[](Type *T) { return T->isVectorTy() || T->isAggregateType(); });
6141+
// Consider the arguments compatible iff they aren't large vectors or
6142+
// aggregates.
6143+
return llvm::none_of(Types, [this](Type *T) {
6144+
return (isa<FixedVectorType>(T) || T->isAggregateType()) &&
6145+
T->getPrimitiveSizeInBits() > getLoadStoreVecRegBitWidth(0);
6146+
});
61456147
}
61466148

61476149
X86TTIImpl::TTI::MemCmpExpansionOptions

llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -227,12 +227,13 @@ bb:
227227
ret void
228228
}
229229

230-
; This should promote
230+
; This should not promote
231231
define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 {
232232
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
233-
; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR3:[0-9]+]] {
233+
; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR3:[0-9]+]] {
234234
; CHECK-NEXT: bb:
235-
; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
235+
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
236+
; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
236237
; CHECK-NEXT: ret void
237238
;
238239
bb:
@@ -243,13 +244,12 @@ bb:
243244

244245
define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
245246
; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
246-
; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
247+
; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
247248
; CHECK-NEXT: bb:
248249
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
249250
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
250251
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
251-
; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
252-
; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
252+
; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], ptr [[TMP]])
253253
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
254254
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
255255
; CHECK-NEXT: ret void
@@ -264,12 +264,13 @@ bb:
264264
ret void
265265
}
266266

267-
; This should promote
267+
; This should not promote
268268
define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 {
269269
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
270-
; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR3]] {
270+
; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR4]] {
271271
; CHECK-NEXT: bb:
272-
; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
272+
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
273+
; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
273274
; CHECK-NEXT: ret void
274275
;
275276
bb:
@@ -285,8 +286,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
285286
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
286287
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
287288
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
288-
; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
289-
; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
289+
; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], ptr [[TMP]])
290290
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
291291
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
292292
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)