Skip to content

Commit db6f627

Browse files
authored
[clang][SME] Ignore flatten/clang::always_inline statements for callees with mismatched streaming attributes (llvm#116391)
If `__attribute__((flatten))` is used on a function, or `[[clang::always_inline]]` on a statement, don't inline any callees with incompatible streaming attributes. Without this check, clang may produce incorrect code when these attributes are used in code with streaming functions. Note: The docs for flatten say it can be ignored when inlining is impossible: "causes calls within the attributed function to be inlined unless it is impossible to do so". Similarly, the (clang-only) `[[clang::always_inline]]` statement attribute is more relaxed than the GNU `__attribute__((always_inline))` (which says it should error it if it can't inline), saying only "If a statement is marked [[clang::always_inline]] and contains calls, the compiler attempts to inline those calls.". The docs also go on to show an example of where `[[clang::always_inline]]` has no effect.
1 parent 624e52b commit db6f627

File tree

4 files changed

+172
-18
lines changed

4 files changed

+172
-18
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5111,9 +5111,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
51115111

51125112
// Some architectures (such as x86-64) have the ABI changed based on
51135113
// attribute-target/features. Give them a chance to diagnose.
5114-
CGM.getTargetCodeGenInfo().checkFunctionCallABI(
5115-
CGM, Loc, dyn_cast_or_null<FunctionDecl>(CurCodeDecl),
5116-
dyn_cast_or_null<FunctionDecl>(TargetDecl), CallArgs, RetTy);
5114+
const FunctionDecl *CallerDecl = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
5115+
const FunctionDecl *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl);
5116+
CGM.getTargetCodeGenInfo().checkFunctionCallABI(CGM, Loc, CallerDecl,
5117+
CalleeDecl, CallArgs, RetTy);
51175118

51185119
// 1. Set up the arguments.
51195120

@@ -5688,7 +5689,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
56885689
Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline);
56895690

56905691
// Add call-site always_inline attribute if exists.
5691-
if (InAlwaysInlineAttributedStmt)
5692+
// Note: This corresponds to the [[clang::always_inline]] statement attribute.
5693+
if (InAlwaysInlineAttributedStmt &&
5694+
!CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI(
5695+
CallerDecl, CalleeDecl))
56925696
Attrs =
56935697
Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
56945698

@@ -5704,7 +5708,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
57045708
// FIXME: should this really take priority over __try, below?
57055709
if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
57065710
!InNoInlineAttributedStmt &&
5707-
!(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) {
5711+
!(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>()) &&
5712+
!CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI(
5713+
CallerDecl, CalleeDecl)) {
57085714
Attrs =
57095715
Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
57105716
}

clang/lib/CodeGen/TargetInfo.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,24 @@ class TargetCodeGenInfo {
9898
const CallArgList &Args,
9999
QualType ReturnType) const {}
100100

101+
/// Returns true if inlining the function call would produce incorrect code
102+
/// for the current target and should be ignored (even with the always_inline
103+
/// or flatten attributes).
104+
///
105+
/// Note: This probably should be handled in LLVM. However, the LLVM
106+
/// `alwaysinline` attribute currently means the inliner will ignore
107+
/// mismatched attributes (which sometimes can generate invalid code). So,
108+
/// this hook allows targets to avoid adding the LLVM `alwaysinline` attribute
109+
/// based on C/C++ attributes or other target-specific reasons.
110+
///
111+
/// See previous discussion here:
112+
/// https://discourse.llvm.org/t/rfc-avoid-inlining-alwaysinline-functions-when-they-cannot-be-inlined/79528
113+
virtual bool
114+
wouldInliningViolateFunctionCallABI(const FunctionDecl *Caller,
115+
const FunctionDecl *Callee) const {
116+
return false;
117+
}
118+
101119
/// Determines the size of struct _Unwind_Exception on this platform,
102120
/// in 8-bit units. The Itanium ABI defines this as:
103121
/// struct _Unwind_Exception {

clang/lib/CodeGen/Targets/AArch64.cpp

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,9 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
177177
const FunctionDecl *Callee, const CallArgList &Args,
178178
QualType ReturnType) const override;
179179

180+
bool wouldInliningViolateFunctionCallABI(
181+
const FunctionDecl *Caller, const FunctionDecl *Callee) const override;
182+
180183
private:
181184
// Diagnose calls between functions with incompatible Streaming SVE
182185
// attributes.
@@ -1143,30 +1146,67 @@ void AArch64TargetCodeGenInfo::checkFunctionABI(
11431146
}
11441147
}
11451148

1146-
void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
1147-
CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
1148-
const FunctionDecl *Callee) const {
1149-
if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
1150-
return;
1149+
enum class ArmSMEInlinability : uint8_t {
1150+
Ok = 0,
1151+
ErrorCalleeRequiresNewZA = 1 << 0,
1152+
WarnIncompatibleStreamingModes = 1 << 1,
1153+
ErrorIncompatibleStreamingModes = 1 << 2,
1154+
1155+
IncompatibleStreamingModes =
1156+
WarnIncompatibleStreamingModes | ErrorIncompatibleStreamingModes,
11511157

1158+
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/ErrorIncompatibleStreamingModes),
1159+
};
1160+
1161+
/// Determines if there are any Arm SME ABI issues with inlining \p Callee into
1162+
/// \p Caller. Returns the issue (if any) in the ArmSMEInlinability bit enum.
1163+
static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller,
1164+
const FunctionDecl *Callee) {
11521165
bool CallerIsStreaming =
11531166
IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true);
11541167
bool CalleeIsStreaming =
11551168
IsArmStreamingFunction(Callee, /*IncludeLocallyStreaming=*/true);
11561169
bool CallerIsStreamingCompatible = isStreamingCompatible(Caller);
11571170
bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee);
11581171

1172+
ArmSMEInlinability Inlinability = ArmSMEInlinability::Ok;
1173+
11591174
if (!CalleeIsStreamingCompatible &&
1160-
(CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible))
1161-
CGM.getDiags().Report(
1162-
CallLoc, CalleeIsStreaming
1163-
? diag::err_function_always_inline_attribute_mismatch
1164-
: diag::warn_function_always_inline_attribute_mismatch)
1165-
<< Caller->getDeclName() << Callee->getDeclName() << "streaming";
1175+
(CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) {
1176+
if (CalleeIsStreaming)
1177+
Inlinability |= ArmSMEInlinability::ErrorIncompatibleStreamingModes;
1178+
else
1179+
Inlinability |= ArmSMEInlinability::WarnIncompatibleStreamingModes;
1180+
}
11661181
if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
11671182
if (NewAttr->isNewZA())
1168-
CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
1169-
<< Callee->getDeclName();
1183+
Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZA;
1184+
1185+
return Inlinability;
1186+
}
1187+
1188+
void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
1189+
CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
1190+
const FunctionDecl *Callee) const {
1191+
if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
1192+
return;
1193+
1194+
ArmSMEInlinability Inlinability = GetArmSMEInlinability(Caller, Callee);
1195+
1196+
if ((Inlinability & ArmSMEInlinability::IncompatibleStreamingModes) !=
1197+
ArmSMEInlinability::Ok)
1198+
CGM.getDiags().Report(
1199+
CallLoc,
1200+
(Inlinability & ArmSMEInlinability::ErrorIncompatibleStreamingModes) ==
1201+
ArmSMEInlinability::ErrorIncompatibleStreamingModes
1202+
? diag::err_function_always_inline_attribute_mismatch
1203+
: diag::warn_function_always_inline_attribute_mismatch)
1204+
<< Caller->getDeclName() << Callee->getDeclName() << "streaming";
1205+
1206+
if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZA) ==
1207+
ArmSMEInlinability::ErrorCalleeRequiresNewZA)
1208+
CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
1209+
<< Callee->getDeclName();
11701210
}
11711211

11721212
// If the target does not have floating-point registers, but we are using a
@@ -1200,6 +1240,12 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
12001240
checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, ReturnType);
12011241
}
12021242

1243+
bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI(
1244+
const FunctionDecl *Caller, const FunctionDecl *Callee) const {
1245+
return Caller && Callee &&
1246+
GetArmSMEInlinability(Caller, Callee) != ArmSMEInlinability::Ok;
1247+
}
1248+
12031249
void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
12041250
unsigned Index,
12051251
raw_ostream &Out) const {
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -DUSE_FLATTEN -o - | FileCheck %s
2+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -DUSE_ALWAYS_INLINE_STMT -o - | FileCheck %s
3+
4+
// REQUIRES: aarch64-registered-target
5+
6+
extern void was_inlined(void);
7+
8+
#if defined(USE_FLATTEN)
9+
#define FN_ATTR __attribute__((flatten))
10+
#define STMT_ATTR
11+
#elif defined(USE_ALWAYS_INLINE_STMT)
12+
#define FN_ATTR
13+
#define STMT_ATTR [[clang::always_inline]]
14+
#else
15+
#error Expected USE_FLATTEN or USE_ALWAYS_INLINE_STMT to be defined.
16+
#endif
17+
18+
void fn(void) { was_inlined(); }
19+
void fn_streaming_compatible(void) __arm_streaming_compatible { was_inlined(); }
20+
void fn_streaming(void) __arm_streaming { was_inlined(); }
21+
__arm_locally_streaming void fn_locally_streaming(void) { was_inlined(); }
22+
__arm_new("za") void fn_streaming_new_za(void) __arm_streaming { was_inlined(); }
23+
24+
FN_ATTR
25+
void caller(void) {
26+
STMT_ATTR fn();
27+
STMT_ATTR fn_streaming_compatible();
28+
STMT_ATTR fn_streaming();
29+
STMT_ATTR fn_locally_streaming();
30+
STMT_ATTR fn_streaming_new_za();
31+
}
32+
// CHECK-LABEL: void @caller()
33+
// CHECK-NEXT: entry:
34+
// CHECK-NEXT: call void @was_inlined
35+
// CHECK-NEXT: call void @was_inlined
36+
// CHECK-NEXT: call void @fn_streaming
37+
// CHECK-NEXT: call void @fn_locally_streaming
38+
// CHECK-NEXT: call void @fn_streaming_new_za
39+
40+
FN_ATTR void caller_streaming_compatible(void) __arm_streaming_compatible {
41+
STMT_ATTR fn();
42+
STMT_ATTR fn_streaming_compatible();
43+
STMT_ATTR fn_streaming();
44+
STMT_ATTR fn_locally_streaming();
45+
STMT_ATTR fn_streaming_new_za();
46+
}
47+
// CHECK-LABEL: void @caller_streaming_compatible()
48+
// CHECK-NEXT: entry:
49+
// CHECK-NEXT: call void @fn
50+
// CHECK-NEXT: call void @was_inlined
51+
// CHECK-NEXT: call void @fn_streaming
52+
// CHECK-NEXT: call void @fn_locally_streaming
53+
// CHECK-NEXT: call void @fn_streaming_new_za
54+
55+
FN_ATTR void caller_streaming(void) __arm_streaming {
56+
STMT_ATTR fn();
57+
STMT_ATTR fn_streaming_compatible();
58+
STMT_ATTR fn_streaming();
59+
STMT_ATTR fn_locally_streaming();
60+
STMT_ATTR fn_streaming_new_za();
61+
}
62+
// CHECK-LABEL: void @caller_streaming()
63+
// CHECK-NEXT: entry:
64+
// CHECK-NEXT: call void @fn
65+
// CHECK-NEXT: call void @was_inlined
66+
// CHECK-NEXT: call void @was_inlined
67+
// CHECK-NEXT: call void @was_inlined
68+
// CHECK-NEXT: call void @fn_streaming_new_za
69+
70+
FN_ATTR __arm_locally_streaming
71+
void caller_locally_streaming(void) {
72+
STMT_ATTR fn();
73+
STMT_ATTR fn_streaming_compatible();
74+
STMT_ATTR fn_streaming();
75+
STMT_ATTR fn_locally_streaming();
76+
STMT_ATTR fn_streaming_new_za();
77+
}
78+
// CHECK-LABEL: void @caller_locally_streaming()
79+
// CHECK-NEXT: entry:
80+
// CHECK-NEXT: call void @fn
81+
// CHECK-NEXT: call void @was_inlined
82+
// CHECK-NEXT: call void @was_inlined
83+
// CHECK-NEXT: call void @was_inlined
84+
// CHECK-NEXT: call void @fn_streaming_new_za

0 commit comments

Comments
 (0)