Skip to content

Commit 5f1c1f4

Browse files
AaronBallmanGeorgeARM
authored andcommitted
[C] Add diagnostic + attr for unterminated strings (llvm#137829)
This introduces three things related to intialization like: char buf[3] = "foo"; where the array does not declare enough space for the null terminator but otherwise can represent the array contents exactly. 1) An attribute named 'nonstring' which can be used to mark that a field or variable is not intended to hold string data. 2) -Wunterminated-string-initialization, which is grouped under -Wextra, and diagnoses the above construct unless the declaration uses the 'nonstring' attribute. 3) -Wc++-unterminated-string-initialization, which is grouped under -Wc++-compat, and diagnoses the above construct even if the declaration uses the 'nonstring' attribute. Fixes llvm#137705
1 parent 08887b5 commit 5f1c1f4

9 files changed

+142
-10
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,23 @@ C Language Changes
171171
``-Wenum-conversion`` and ``-Wimplicit-int-enum-cast``. This conversion is an
172172
int-to-enum conversion because the enumeration on the right-hand side is
173173
promoted to ``int`` before the assignment.
174+
- Added ``-Wunterminated-string-initialization``, grouped under ``-Wextra``,
175+
which diagnoses an initialization from a string literal where only the null
176+
terminator cannot be stored. e.g.,
177+
178+
.. code-block:: c
179+
180+
181+
char buf1[3] = "foo"; // -Wunterminated-string-initialization
182+
char buf2[3] = "flarp"; // -Wexcess-initializers
183+
184+
This diagnostic can be suppressed by adding the new ``nonstring`` attribute
185+
to the field or variable being initialized. #GH137705
186+
- Added ``-Wc++-unterminated-string-initialization``, grouped under
187+
``-Wc++-compat``, which also diagnoses the same cases as
188+
``-Wunterminated-string-initialization``. However, this diagnostic is not
189+
silenced by the ``nonstring`` attribute as these initializations are always
190+
incompatible with C++.
174191

175192
C2y Feature Support
176193
^^^^^^^^^^^^^^^^^^^

clang/include/clang/Basic/Attr.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5024,3 +5024,9 @@ def OpenACCRoutineDecl :InheritableAttr {
50245024
void printPrettyPragma(raw_ostream &OS, const PrintingPolicy &Policy) const;
50255025
}];
50265026
}
5027+
5028+
def NonString : InheritableAttr {
5029+
let Spellings = [GCC<"nonstring">];
5030+
let Subjects = SubjectList<[Var, Field]>;
5031+
let Documentation = [NonStringDocs];
5032+
}

clang/include/clang/Basic/AttrDocs.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9294,3 +9294,18 @@ Declares that a function potentially allocates heap memory, and prevents any pot
92949294
of ``nonallocating`` by the compiler.
92959295
}];
92969296
}
9297+
9298+
def NonStringDocs : Documentation {
9299+
let Category = DocCatDecl;
9300+
let Content = [{
9301+
The ``nonstring`` attribute can be applied to the declaration of a variable or
9302+
a field whose type is a character array to specify that the character array is
9303+
not intended to behave like a null-terminated string. This will silence
9304+
diagnostics with code like:
9305+
9306+
.. code-block:: c
9307+
9308+
char BadStr[3] = "foo"; // No space for the null terminator, diagnosed
9309+
__attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
9310+
}];
9311+
}

clang/include/clang/Basic/DiagnosticGroups.td

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,14 +156,19 @@ def BuiltinRequiresHeader : DiagGroup<"builtin-requires-header">;
156156
def C99Compat : DiagGroup<"c99-compat">;
157157
def C23Compat : DiagGroup<"c23-compat">;
158158
def : DiagGroup<"c2x-compat", [C23Compat]>;
159+
def InitStringTooLongMissingNonString :
160+
DiagGroup<"unterminated-string-initialization">;
161+
def InitStringTooLongForCpp :
162+
DiagGroup<"c++-unterminated-string-initialization">;
159163
def HiddenCppDecl : DiagGroup<"c++-hidden-decl">;
160164
def DefaultConstInitUnsafe : DiagGroup<"default-const-init-unsafe">;
161165
def DefaultConstInit : DiagGroup<"default-const-init", [DefaultConstInitUnsafe]>;
162166
def ImplicitVoidPtrCast : DiagGroup<"implicit-void-ptr-cast">;
163167
def ImplicitIntToEnumCast : DiagGroup<"implicit-int-enum-cast",
164168
[ImplicitEnumEnumCast]>;
165169
def CXXCompat: DiagGroup<"c++-compat", [ImplicitVoidPtrCast, DefaultConstInit,
166-
ImplicitIntToEnumCast, HiddenCppDecl]>;
170+
ImplicitIntToEnumCast, HiddenCppDecl,
171+
InitStringTooLongForCpp]>;
167172

168173
def ExternCCompat : DiagGroup<"extern-c-compat">;
169174
def KeywordCompat : DiagGroup<"keyword-compat">;
@@ -1143,6 +1148,7 @@ def Extra : DiagGroup<"extra", [
11431148
StringConcatation,
11441149
FUseLdPath,
11451150
CastFunctionTypeMismatch,
1151+
InitStringTooLongMissingNonString,
11461152
]>;
11471153

11481154
def Most : DiagGroup<"most", [

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3804,6 +3804,9 @@ def err_attribute_weakref_without_alias : Error<
38043804
"weakref declaration of %0 must also have an alias attribute">;
38053805
def err_alias_not_supported_on_darwin : Error <
38063806
"aliases are not supported on darwin">;
3807+
def warn_attribute_non_character_array : Warning<
3808+
"%0%select{ attribute|}1 only applies to fields or variables of character "
3809+
"array type; type is %2">, InGroup<IgnoredAttributes>;
38073810
def warn_attribute_wrong_decl_type_str : Warning<
38083811
"%0%select{ attribute|}1 only applies to %2">, InGroup<IgnoredAttributes>;
38093812
def err_attribute_wrong_decl_type_str : Error<
@@ -6404,6 +6407,15 @@ def err_initializer_string_for_char_array_too_long : Error<
64046407
def ext_initializer_string_for_char_array_too_long : ExtWarn<
64056408
"initializer-string for char array is too long">,
64066409
InGroup<ExcessInitializers>;
6410+
def warn_initializer_string_for_char_array_too_long_no_nonstring : Warning<
6411+
"initializer-string for character array is too long, array size is %0 but "
6412+
"initializer has size %1 (including the null terminating character); did you "
6413+
"mean to use the 'nonstring' attribute?">,
6414+
InGroup<InitStringTooLongMissingNonString>, DefaultIgnore;
6415+
def warn_initializer_string_for_char_array_too_long_for_cpp : Warning<
6416+
"initializer-string for character array is too long for C++, array size is "
6417+
"%0 but initializer has size %1 (including the null terminating character)">,
6418+
InGroup<InitStringTooLongForCpp>, DefaultIgnore;
64076419
def warn_missing_field_initializers : Warning<
64086420
"missing field %0 initializer">,
64096421
InGroup<MissingFieldInitializers>, DefaultIgnore;

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4907,6 +4907,20 @@ void Sema::AddModeAttr(Decl *D, const AttributeCommonInfo &CI,
49074907
D->addAttr(::new (Context) ModeAttr(Context, CI, Name));
49084908
}
49094909

4910+
static void handleNonStringAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
4911+
// This only applies to fields and variable declarations which have an array
4912+
// type.
4913+
QualType QT = cast<ValueDecl>(D)->getType();
4914+
if (!QT->isArrayType() ||
4915+
!QT->getBaseElementTypeUnsafe()->isAnyCharacterType()) {
4916+
S.Diag(D->getBeginLoc(), diag::warn_attribute_non_character_array)
4917+
<< AL << AL.isRegularKeywordAttribute() << QT << AL.getRange();
4918+
return;
4919+
}
4920+
4921+
D->addAttr(::new (S.Context) NonStringAttr(S.Context, AL));
4922+
}
4923+
49104924
static void handleNoDebugAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
49114925
D->addAttr(::new (S.Context) NoDebugAttr(S.Context, AL));
49124926
}
@@ -7144,6 +7158,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
71447158
case ParsedAttr::AT_Mode:
71457159
handleModeAttr(S, D, AL);
71467160
break;
7161+
case ParsedAttr::AT_NonString:
7162+
handleNonStringAttr(S, D, AL);
7163+
break;
71477164
case ParsedAttr::AT_NonNull:
71487165
if (auto *PVD = dyn_cast<ParmVarDecl>(D))
71497166
handleNonNullAttrParameter(S, PVD, AL);

clang/lib/Sema/SemaInit.cpp

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,8 @@ static void CheckC23ConstexprInitStringLiteral(const StringLiteral *SE,
210210
Sema &SemaRef, QualType &TT);
211211

212212
static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
213-
Sema &S, bool CheckC23ConstexprInit = false) {
213+
Sema &S, const InitializedEntity &Entity,
214+
bool CheckC23ConstexprInit = false) {
214215
// Get the length of the string as parsed.
215216
auto *ConstantArrayTy =
216217
cast<ConstantArrayType>(Str->getType()->getAsArrayTypeUnsafe());
@@ -232,6 +233,7 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
232233
}
233234

234235
const ConstantArrayType *CAT = cast<ConstantArrayType>(AT);
236+
uint64_t ArrayLen = CAT->getZExtSize();
235237

236238
// We have an array of character type with known size. However,
237239
// the size may be smaller or larger than the string we are initializing.
@@ -247,16 +249,31 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
247249
}
248250

249251
// [dcl.init.string]p2
250-
if (StrLength > CAT->getZExtSize())
252+
if (StrLength > ArrayLen)
251253
S.Diag(Str->getBeginLoc(),
252254
diag::err_initializer_string_for_char_array_too_long)
253-
<< CAT->getZExtSize() << StrLength << Str->getSourceRange();
255+
<< ArrayLen << StrLength << Str->getSourceRange();
254256
} else {
255257
// C99 6.7.8p14.
256-
if (StrLength - 1 > CAT->getZExtSize())
258+
if (StrLength - 1 > ArrayLen)
257259
S.Diag(Str->getBeginLoc(),
258260
diag::ext_initializer_string_for_char_array_too_long)
259261
<< Str->getSourceRange();
262+
else if (StrLength - 1 == ArrayLen) {
263+
// If the entity being initialized has the nonstring attribute, then
264+
// silence the "missing nonstring" diagnostic.
265+
if (const ValueDecl *D = Entity.getDecl();
266+
!D || !D->hasAttr<NonStringAttr>())
267+
S.Diag(
268+
Str->getBeginLoc(),
269+
diag::warn_initializer_string_for_char_array_too_long_no_nonstring)
270+
<< ArrayLen << StrLength << Str->getSourceRange();
271+
272+
// Always emit the C++ compatibility diagnostic.
273+
S.Diag(Str->getBeginLoc(),
274+
diag::warn_initializer_string_for_char_array_too_long_for_cpp)
275+
<< ArrayLen << StrLength << Str->getSourceRange();
276+
}
260277
}
261278

262279
// Set the type to the actual size that we are initializing. If we have
@@ -1579,7 +1596,7 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity,
15791596
if (IsStringInit(expr, arrayType, SemaRef.Context) == SIF_None) {
15801597
// FIXME: Should we do this checking in verify-only mode?
15811598
if (!VerifyOnly)
1582-
CheckStringInit(expr, ElemType, arrayType, SemaRef,
1599+
CheckStringInit(expr, ElemType, arrayType, SemaRef, Entity,
15831600
SemaRef.getLangOpts().C23 &&
15841601
initializingConstexprVariable(Entity));
15851602
if (StructuredList)
@@ -2089,9 +2106,9 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity,
20892106
// constant for each string.
20902107
// FIXME: Should we do these checks in verify-only mode too?
20912108
if (!VerifyOnly)
2092-
CheckStringInit(IList->getInit(Index), DeclType, arrayType, SemaRef,
2093-
SemaRef.getLangOpts().C23 &&
2094-
initializingConstexprVariable(Entity));
2109+
CheckStringInit(
2110+
IList->getInit(Index), DeclType, arrayType, SemaRef, Entity,
2111+
SemaRef.getLangOpts().C23 && initializingConstexprVariable(Entity));
20952112
if (StructuredList) {
20962113
UpdateStructuredListElement(StructuredList, StructuredIndex,
20972114
IList->getInit(Index));
@@ -8405,7 +8422,7 @@ ExprResult InitializationSequence::Perform(Sema &S,
84058422
QualType Ty = Step->Type;
84068423
bool UpdateType = ResultType && Entity.getType()->isIncompleteArrayType();
84078424
CheckStringInit(CurInit.get(), UpdateType ? *ResultType : Ty,
8408-
S.Context.getAsArrayType(Ty), S,
8425+
S.Context.getAsArrayType(Ty), S, Entity,
84098426
S.getLangOpts().C23 &&
84108427
initializingConstexprVariable(Entity));
84118428
break;

clang/test/Misc/pragma-attribute-supported-attributes-list.test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@
133133
// CHECK-NEXT: NoThreadSafetyAnalysis (SubjectMatchRule_function)
134134
// CHECK-NEXT: NoThrow (SubjectMatchRule_hasType_functionType)
135135
// CHECK-NEXT: NoUwtable (SubjectMatchRule_hasType_functionType)
136+
// CHECK-NEXT: NonString (SubjectMatchRule_variable, SubjectMatchRule_field)
136137
// CHECK-NEXT: NotTailCalled (SubjectMatchRule_function)
137138
// CHECK-NEXT: OMPAssume (SubjectMatchRule_function, SubjectMatchRule_objc_method)
138139
// CHECK-NEXT: OSConsumed (SubjectMatchRule_variable_is_parameter)

clang/test/Sema/attr-nonstring.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,no-nonstring -Wunterminated-string-initialization %s
2+
// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,no-nonstring -Wextra %s
3+
// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,compat -Wc++-unterminated-string-initialization %s
4+
// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,compat -Wc++-compat %s
5+
// RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx -x c++ %s
6+
// RUN: %clang_cc1 -fsyntax-only -verify=expected,noncxx -Wno-unterminated-string-initialization %s
7+
// RUN: %clang_cc1 -fsyntax-only -verify=expected,noncxx -Wc++-compat -Wextra -Wno-unterminated-string-initialization -Wno-c++-unterminated-string-initialization %s
8+
9+
char foo[3] = "foo"; // no-nonstring-warning {{initializer-string for character array is too long, array size is 3 but initializer has size 4 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} \
10+
compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
11+
cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
12+
__attribute__((nonstring)) char bar[3] = "bar"; // compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
13+
cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
14+
15+
struct S {
16+
char buf[3];
17+
__attribute__((nonstring)) char fub[3];
18+
} s = { "baz", "bop" }; // no-nonstring-warning {{initializer-string for character array is too long, array size is 3 but initializer has size 4 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} \
19+
compat-warning 2 {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
20+
cxx-error 2 {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
21+
22+
// Show that we also issue the diagnostic for the other character types as well.
23+
signed char scfoo[3] = "foo"; // no-nonstring-warning {{initializer-string for character array is too long, array size is 3 but initializer has size 4 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} \
24+
compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
25+
cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
26+
unsigned char ucfoo[3] = "foo"; // no-nonstring-warning {{initializer-string for character array is too long, array size is 3 but initializer has size 4 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} \
27+
compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
28+
cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
29+
30+
// wchar_t (et al) are a bit funny in that it cannot do string init in C++ at
31+
// all, so they are not tested.
32+
33+
// Show that we properly diagnose incorrect uses of nonstring.
34+
__attribute__((nonstring)) void func(void); // expected-warning {{'nonstring' attribute only applies to variables and non-static data members}}
35+
__attribute__((nonstring("test"))) char eek1[2]; // expected-error {{'nonstring' attribute takes no arguments}}
36+
__attribute__((nonstring)) int eek2; // expected-warning {{'nonstring' attribute only applies to fields or variables of character array type; type is 'int'}}
37+
38+
// Note, these diagnostics are separate from the "too many initializers"
39+
// diagnostic when you overwrite more than just the null terminator.
40+
char too_big[3] = "this is too big"; // noncxx-warning {{initializer-string for char array is too long}} \
41+
cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 16 (including the null terminating character)}}

0 commit comments

Comments
 (0)