1 // RUN: %clang_cc1 -verify -std=c99 %s 2 // RUN: %clang_cc1 -verify -std=c99 -fno-dollars-in-identifiers %s 3 4 /* WG14 N717: Clang 17 5 * Extended identifiers 6 */ 7 8 // Used as a sink for UCNs. 9 #define M(arg) 10 11 // C99 6.4.3p1 specifies the grammar for UCNs. A \u must be followed by exactly 12 // four hex digits, and \U must be followed by exactly eight. 13 M(\u1) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 14 M(\u12) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 15 M(\u123) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 16 M(\u1234) // Okay 17 M(\u12345)// Okay, two tokens (UCN followed by 5) 18 19 M(\U1) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 20 M(\U12) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 21 M(\U123) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 22 M(\U1234) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} \ 23 expected-note {{did you mean to use '\u'?}} 24 M(\U12345) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 25 M(\U123456) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 26 M(\U1234567) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 27 M(\U12345678) // Okay 28 M(\U123456789) // Okay-ish, two tokens (valid-per-spec-but-actually-invalid UCN followed by 9) 29 30 // Now test the ones that should work. Note, these work in C17 and earlier but 31 // are part of the basic character set in C23 and thus should be diagnosed in 32 // that mode. They're valid in a character constant, but not valid in an 33 // identifier, except for U+0024 which is allowed if -fdollars-in-identifiers 34 // is enabled. 35 // FIXME: These three should be handled the same way, and should be accepted 36 // when dollar signs are allowed in identifiers, rather than rejected, see 37 // GH87106. 38 M(\u0024) // expected-error {{character '$' cannot be specified by a universal character name}} 39 M(\U00000024) // expected-error {{character '$' cannot be specified by a universal character name}} 40 M($) 41 42 // These should always be rejected because they're not valid identifier 43 // characters. 44 // FIXME: the diagnostic could be improved to make it clear this is an issue 45 // with forming an identifier rather than a UCN. 46 M(\u0040) // expected-error {{character '@' cannot be specified by a universal character name}} 47 M(\u0060) // expected-error {{character '`' cannot be specified by a universal character name}} 48 M(\U00000040) // expected-error {{character '@' cannot be specified by a universal character name}} 49 M(\U00000060) // expected-error {{character '`' cannot be specified by a universal character name}} 50 51 // UCNs outside of identifiers are handled in Phase 5 of translation, so we 52 // cannot use the macro expansion to test their behavior. 53 54 // This is outside of the range of values specified by ISO 10646. 55 const char *c1 = "\U00110000"; // expected-error {{invalid universal character}} 56 // This does not fall outside of the range 57 const char *c2 = "\U0010FFFF"; 58 59 // These should always be accepted because they're a valid in a character 60 // constant. 61 int c3 = '\u0024'; 62 int c4 = '\u0040'; 63 int c5 = '\u0060'; 64 65 int c6 = '\U00000024'; 66 int c7 = '\U00000040'; 67 int c8 = '\U00000060'; 68 69 // Valid lone surrogates. 70 M(\uD799) 71 const char *c9 = "\U0000E000"; 72 73 // Invalid lone surrogates, which are excluded explicitly by 6.4.3p2. 74 M(\uD800) // expected-error {{invalid universal character}} 75 const char *c10 = "\U0000DFFF"; // expected-error {{invalid universal character}} 76