Skip to content

Commit b34a4af

Browse files
committed
[Inliner] Propagate more attributes to params when inlining
Add support for propagating: - `derefereancable` - `derefereancable_or_null` - `align` - `nonnull` - `nofree` These are only propagated if the parameter to the to-be-inlined callsite match the exact parameter used in the to-be-inlined function.
1 parent c304152 commit b34a4af

File tree

6 files changed

+75
-26
lines changed

6 files changed

+75
-26
lines changed

clang/test/CodeGen/attr-counted-by-pr88931.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ void init(void * __attribute__((pass_dynamic_object_size(0))));
1313
// CHECK-LABEL: define dso_local void @_ZN3foo3barC1Ev(
1414
// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
1515
// CHECK-NEXT: entry:
16-
// CHECK-NEXT: tail call void @_Z4initPvU25pass_dynamic_object_size0(ptr noundef nonnull [[THIS]], i64 noundef -1) #[[ATTR2:[0-9]+]]
16+
// CHECK-NEXT: tail call void @_Z4initPvU25pass_dynamic_object_size0(ptr noundef nonnull align 4 dereferenceable(1) [[THIS]], i64 noundef -1) #[[ATTR2:[0-9]+]]
1717
// CHECK-NEXT: ret void
1818
//
1919
foo::bar::bar() {

clang/test/OpenMP/bug57757.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ void foo() {
3939
// CHECK-NEXT: ]
4040
// CHECK: .untied.jmp..i:
4141
// CHECK-NEXT: store i32 1, ptr [[TMP2]], align 4, !tbaa [[TBAA16]], !alias.scope [[META13]], !noalias [[META17]]
42-
// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]), !noalias [[META13]]
42+
// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP1]]), !noalias [[META13]]
4343
// CHECK-NEXT: br label [[DOTOMP_OUTLINED__EXIT]]
4444
// CHECK: .untied.next..i:
4545
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40

llvm/lib/Transforms/Utils/InlineFunction.cpp

+62-13
Original file line numberDiff line numberDiff line change
@@ -1352,20 +1352,41 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
13521352
auto &Context = CalledFunction->getContext();
13531353

13541354
// Collect valid attributes for all params.
1355-
SmallVector<AttrBuilder> ValidParamAttrs;
1355+
SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
13561356
bool HasAttrToPropagate = false;
13571357

13581358
for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
1359-
ValidParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
1359+
ValidObjParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
1360+
ValidExactParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
13601361
// Access attributes can be propagated to any param with the same underlying
13611362
// object as the argument.
13621363
if (CB.paramHasAttr(I, Attribute::ReadNone))
1363-
ValidParamAttrs.back().addAttribute(Attribute::ReadNone);
1364+
ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone);
13641365
if (CB.paramHasAttr(I, Attribute::ReadOnly))
1365-
ValidParamAttrs.back().addAttribute(Attribute::ReadOnly);
1366+
ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly);
13661367
if (CB.paramHasAttr(I, Attribute::WriteOnly))
1367-
ValidParamAttrs.back().addAttribute(Attribute::WriteOnly);
1368-
HasAttrToPropagate |= ValidParamAttrs.back().hasAttributes();
1368+
ValidObjParamAttrs.back().addAttribute(Attribute::WriteOnly);
1369+
1370+
// Attributes we can only propagate if the exact parameter is forwarded.
1371+
1372+
// We can propagate both poison generating and UB generating attributes
1373+
// without any extra checks. The only attribute that is tricky to propagate
1374+
// is `noundef` (skipped for now) as that can create new UB where previous
1375+
// behavior was just using a poison value.
1376+
if (auto DerefBytes = CB.getParamDereferenceableBytes(I))
1377+
ValidExactParamAttrs.back().addDereferenceableAttr(DerefBytes);
1378+
if (auto DerefOrNullBytes = CB.getParamDereferenceableOrNullBytes(I))
1379+
ValidExactParamAttrs.back().addDereferenceableOrNullAttr(
1380+
DerefOrNullBytes);
1381+
if (CB.paramHasAttr(I, Attribute::NoFree))
1382+
ValidExactParamAttrs.back().addAttribute(Attribute::NoFree);
1383+
if (CB.paramHasAttr(I, Attribute::NonNull))
1384+
ValidExactParamAttrs.back().addAttribute(Attribute::NonNull);
1385+
if (auto Align = CB.getParamAlign(I))
1386+
ValidExactParamAttrs.back().addAlignmentAttr(Align);
1387+
1388+
HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes();
1389+
HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes();
13691390
}
13701391

13711392
// Won't be able to propagate anything.
@@ -1383,15 +1404,43 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
13831404
AttributeList AL = NewInnerCB->getAttributes();
13841405
for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
13851406
// Check if the underlying value for the parameter is an argument.
1386-
const Value *UnderlyingV =
1387-
getUnderlyingObject(InnerCB->getArgOperand(I));
1388-
const Argument *Arg = dyn_cast<Argument>(UnderlyingV);
1389-
if (!Arg)
1390-
continue;
1407+
const Argument *Arg = dyn_cast<Argument>(InnerCB->getArgOperand(I));
1408+
unsigned ArgNo;
1409+
if (Arg) {
1410+
ArgNo = Arg->getArgNo();
1411+
// For dereferenceable, dereferenceable_or_null, align, etc...
1412+
// we don't want to propagate if the existing param has the same
1413+
// attribute with "better" constraints. So, only remove from the
1414+
// existing AL if the region of the existing param is smaller than
1415+
// what we can propagate. AttributeList's merge API honours the
1416+
// already existing attribute value so we choose the "better"
1417+
// attribute by removing if the existing one is worse.
1418+
if (AL.getParamDereferenceableBytes(I) <
1419+
ValidExactParamAttrs[ArgNo].getDereferenceableBytes())
1420+
AL =
1421+
AL.removeParamAttribute(Context, I, Attribute::Dereferenceable);
1422+
if (AL.getParamDereferenceableOrNullBytes(I) <
1423+
ValidExactParamAttrs[ArgNo].getDereferenceableOrNullBytes())
1424+
AL =
1425+
AL.removeParamAttribute(Context, I, Attribute::Dereferenceable);
1426+
if (AL.getParamAlignment(I).valueOrOne() <
1427+
ValidExactParamAttrs[ArgNo].getAlignment().valueOrOne())
1428+
AL = AL.removeParamAttribute(Context, I, Attribute::Alignment);
1429+
1430+
AL = AL.addParamAttributes(Context, I, ValidExactParamAttrs[ArgNo]);
1431+
1432+
} else {
1433+
// Check if the underlying value for the parameter is an argument.
1434+
const Value *UnderlyingV =
1435+
getUnderlyingObject(InnerCB->getArgOperand(I));
1436+
Arg = dyn_cast<Argument>(UnderlyingV);
1437+
if (!Arg)
1438+
continue;
1439+
ArgNo = Arg->getArgNo();
1440+
}
13911441

1392-
unsigned ArgNo = Arg->getArgNo();
13931442
// If so, propagate its access attributes.
1394-
AL = AL.addParamAttributes(Context, I, ValidParamAttrs[ArgNo]);
1443+
AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]);
13951444
// We can have conflicting attributes from the inner callsite and
13961445
// to-be-inlined callsite. In that case, choose the most
13971446
// restrictive.

llvm/test/Transforms/Inline/access-attributes-prop.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ define void @prop_param_callbase_def_1x_partial_3(ptr %p, ptr %p2) {
294294
define void @prop_deref(ptr %p) {
295295
; CHECK-LABEL: define {{[^@]+}}@prop_deref
296296
; CHECK-SAME: (ptr [[P:%.*]]) {
297-
; CHECK-NEXT: call void @bar1(ptr [[P]])
297+
; CHECK-NEXT: call void @bar1(ptr dereferenceable(16) [[P]])
298298
; CHECK-NEXT: ret void
299299
;
300300
call void @foo1(ptr dereferenceable(16) %p)
@@ -304,7 +304,7 @@ define void @prop_deref(ptr %p) {
304304
define void @prop_deref_or_null(ptr %p) {
305305
; CHECK-LABEL: define {{[^@]+}}@prop_deref_or_null
306306
; CHECK-SAME: (ptr [[P:%.*]]) {
307-
; CHECK-NEXT: call void @bar1(ptr [[P]])
307+
; CHECK-NEXT: call void @bar1(ptr dereferenceable_or_null(256) [[P]])
308308
; CHECK-NEXT: ret void
309309
;
310310
call void @foo1(ptr dereferenceable_or_null(256) %p)
@@ -314,7 +314,7 @@ define void @prop_deref_or_null(ptr %p) {
314314
define void @prop_param_nonnull_and_align(ptr %p) {
315315
; CHECK-LABEL: define {{[^@]+}}@prop_param_nonnull_and_align
316316
; CHECK-SAME: (ptr [[P:%.*]]) {
317-
; CHECK-NEXT: call void @bar1(ptr [[P]])
317+
; CHECK-NEXT: call void @bar1(ptr nonnull align 32 [[P]])
318318
; CHECK-NEXT: ret void
319319
;
320320
call void @foo1(ptr nonnull align 32 %p)
@@ -324,7 +324,7 @@ define void @prop_param_nonnull_and_align(ptr %p) {
324324
define void @prop_param_nofree_and_align(ptr %p) {
325325
; CHECK-LABEL: define {{[^@]+}}@prop_param_nofree_and_align
326326
; CHECK-SAME: (ptr [[P:%.*]]) {
327-
; CHECK-NEXT: call void @bar1(ptr [[P]])
327+
; CHECK-NEXT: call void @bar1(ptr nofree align 32 [[P]])
328328
; CHECK-NEXT: ret void
329329
;
330330
call void @foo1(ptr nofree align 32 %p)
@@ -334,7 +334,7 @@ define void @prop_param_nofree_and_align(ptr %p) {
334334
define void @prop_param_deref_align_no_update(ptr %p) {
335335
; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_align_no_update
336336
; CHECK-SAME: (ptr [[P:%.*]]) {
337-
; CHECK-NEXT: call void @bar1(ptr align 64 dereferenceable(512) [[P]])
337+
; CHECK-NEXT: call void @bar1(ptr align 4 dereferenceable(64) [[P]])
338338
; CHECK-NEXT: ret void
339339
;
340340
call void @foo1_bar_aligned64_deref512(ptr align 4 dereferenceable(64) %p)
@@ -344,7 +344,7 @@ define void @prop_param_deref_align_no_update(ptr %p) {
344344
define void @prop_param_deref_align_update(ptr %p) {
345345
; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_align_update
346346
; CHECK-SAME: (ptr [[P:%.*]]) {
347-
; CHECK-NEXT: call void @bar1(ptr align 64 dereferenceable(512) [[P]])
347+
; CHECK-NEXT: call void @bar1(ptr align 128 dereferenceable(1024) [[P]])
348348
; CHECK-NEXT: ret void
349349
;
350350
call void @foo1_bar_aligned64_deref512(ptr align 128 dereferenceable(1024) %p)
@@ -354,7 +354,7 @@ define void @prop_param_deref_align_update(ptr %p) {
354354
define void @prop_param_deref_or_null_update(ptr %p) {
355355
; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_or_null_update
356356
; CHECK-SAME: (ptr [[P:%.*]]) {
357-
; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(512) [[P]])
357+
; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(1024) [[P]])
358358
; CHECK-NEXT: ret void
359359
;
360360
call void @foo1_bar_aligned512_deref_or_null512(ptr dereferenceable_or_null(1024) %p)
@@ -364,7 +364,7 @@ define void @prop_param_deref_or_null_update(ptr %p) {
364364
define void @prop_param_deref_or_null_no_update(ptr %p) {
365365
; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_or_null_no_update
366366
; CHECK-SAME: (ptr [[P:%.*]]) {
367-
; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(512) [[P]])
367+
; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(32) [[P]])
368368
; CHECK-NEXT: ret void
369369
;
370370
call void @foo1_bar_aligned512_deref_or_null512(ptr dereferenceable_or_null(32) %p)

llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ declare void @h(ptr %p, ptr %q, ptr %z)
88
define void @f(ptr %p, ptr %q, ptr %z) {
99
; CHECK-LABEL: define void @f
1010
; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[Z:%.*]]) {
11-
; CHECK-NEXT: call void @h(ptr [[P]], ptr [[Q]], ptr [[Z]])
11+
; CHECK-NEXT: call void @h(ptr nonnull [[P]], ptr [[Q]], ptr nonnull [[Z]])
1212
; CHECK-NEXT: ret void
1313
;
1414
call void @g(ptr nonnull %p, ptr %q, ptr nonnull %z)

llvm/test/Transforms/Inline/byval.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ define void @test3() nounwind {
106106
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS]], align 1
107107
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[S1]])
108108
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[S1]], ptr align 1 [[S]], i64 12, i1 false)
109-
; CHECK-NEXT: call void @g3(ptr [[S1]]) #[[ATTR0]]
109+
; CHECK-NEXT: call void @g3(ptr align 64 [[S1]]) #[[ATTR0]]
110110
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[S1]])
111111
; CHECK-NEXT: ret void
112112
;
@@ -131,7 +131,7 @@ define i32 @test4() nounwind {
131131
; CHECK-SAME: ) #[[ATTR0]] {
132132
; CHECK-NEXT: entry:
133133
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 64
134-
; CHECK-NEXT: call void @g3(ptr [[S]]) #[[ATTR0]]
134+
; CHECK-NEXT: call void @g3(ptr align 64 [[S]]) #[[ATTR0]]
135135
; CHECK-NEXT: ret i32 4
136136
;
137137
entry:

0 commit comments

Comments
 (0)