|
| 1 | +// RUN: %clang_cc1 -verify -std=c99 %s |
| 2 | +// RUN: %clang_cc1 -verify -std=c99 -fno-dollars-in-identifiers %s |
| 3 | + |
| 4 | +/* WG14 N717: Clang 17 |
| 5 | + * Extended identifiers |
| 6 | + */ |
| 7 | + |
| 8 | +// Used as a sink for UCNs. |
| 9 | +#define M(arg) |
| 10 | + |
| 11 | +// C99 6.4.3p1 specifies the grammar for UCNs. A \u must be followed by exactly |
| 12 | +// four hex digits, and \U must be followed by exactly eight. |
| 13 | +M(\u1) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} |
| 14 | +M(\u12) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} |
| 15 | +M(\u123) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} |
| 16 | +M(\u1234) // Okay |
| 17 | +M(\u12345)// Okay, two tokens (UCN followed by 5) |
| 18 | + |
| 19 | +M(\U1) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} |
| 20 | +M(\U12) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} |
| 21 | +M(\U123) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} |
| 22 | +M(\U1234) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} \ |
| 23 | + expected-note {{did you mean to use '\u'?}} |
| 24 | +M(\U12345) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} |
| 25 | +M(\U123456) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} |
| 26 | +M(\U1234567) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} |
| 27 | +M(\U12345678) // Okay |
| 28 | +M(\U123456789) // Okay-ish, two tokens (valid-per-spec-but-actually-invalid UCN followed by 9) |
| 29 | + |
| 30 | +// Now test the ones that should work. Note, these work in C17 and earlier but |
| 31 | +// are part of the basic character set in C23 and thus should be diagnosed in |
| 32 | +// that mode. They're valid in a character constant, but not valid in an |
| 33 | +// identifier, except for U+0024 which is allowed if -fdollars-in-identifiers |
| 34 | +// is enabled. |
| 35 | +// FIXME: These three should be handled the same way, and should be accepted |
| 36 | +// when dollar signs are allowed in identifiers, rather than rejected, see |
| 37 | +// GH87106. |
| 38 | +M(\u0024) // expected-error {{character '$' cannot be specified by a universal character name}} |
| 39 | +M(\U00000024) // expected-error {{character '$' cannot be specified by a universal character name}} |
| 40 | +M($) |
| 41 | + |
| 42 | +// These should always be rejected because they're not valid identifier |
| 43 | +// characters. |
| 44 | +// FIXME: the diagnostic could be improved to make it clear this is an issue |
| 45 | +// with forming an identifier rather than a UCN. |
| 46 | +M(\u0040) // expected-error {{character '@' cannot be specified by a universal character name}} |
| 47 | +M(\u0060) // expected-error {{character '`' cannot be specified by a universal character name}} |
| 48 | +M(\U00000040) // expected-error {{character '@' cannot be specified by a universal character name}} |
| 49 | +M(\U00000060) // expected-error {{character '`' cannot be specified by a universal character name}} |
| 50 | + |
| 51 | +// UCNs outside of identifiers are handled in Phase 5 of translation, so we |
| 52 | +// cannot use the macro expansion to test their behavior. |
| 53 | + |
| 54 | +// This is outside of the range of values specified by ISO 10646. |
| 55 | +const char *c1 = "\U00110000"; // expected-error {{invalid universal character}} |
| 56 | +// This does not fall outside of the range |
| 57 | +const char *c2 = "\U0010FFFF"; |
| 58 | + |
| 59 | +// These should always be accepted because they're a valid in a character |
| 60 | +// constant. |
| 61 | +int c3 = '\u0024'; |
| 62 | +int c4 = '\u0040'; |
| 63 | +int c5 = '\u0060'; |
| 64 | + |
| 65 | +int c6 = '\U00000024'; |
| 66 | +int c7 = '\U00000040'; |
| 67 | +int c8 = '\U00000060'; |
| 68 | + |
| 69 | +// Valid lone surrogates. |
| 70 | +M(\uD799) |
| 71 | +const char *c9 = "\U0000E000"; |
| 72 | + |
| 73 | +// Invalid lone surrogates, which are excluded explicitly by 6.4.3p2. |
| 74 | +M(\uD800) // expected-error {{invalid universal character}} |
| 75 | +const char *c10 = "\U0000DFFF"; // expected-error {{invalid universal character}} |
0 commit comments