1*2606c877SAaron Ballman // RUN: %clang_cc1 -verify -std=c99 %s 2*2606c877SAaron Ballman // RUN: %clang_cc1 -verify -std=c99 -fno-dollars-in-identifiers %s 3*2606c877SAaron Ballman 4*2606c877SAaron Ballman /* WG14 N717: Clang 17 5*2606c877SAaron Ballman * Extended identifiers 6*2606c877SAaron Ballman */ 7*2606c877SAaron Ballman 8*2606c877SAaron Ballman // Used as a sink for UCNs. 9*2606c877SAaron Ballman #define M(arg) 10*2606c877SAaron Ballman 11*2606c877SAaron Ballman // C99 6.4.3p1 specifies the grammar for UCNs. A \u must be followed by exactly 12*2606c877SAaron Ballman // four hex digits, and \U must be followed by exactly eight. 13*2606c877SAaron Ballman M(\u1) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 14*2606c877SAaron Ballman M(\u12) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 15*2606c877SAaron Ballman M(\u123) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 16*2606c877SAaron Ballman M(\u1234) // Okay 17*2606c877SAaron Ballman M(\u12345)// Okay, two tokens (UCN followed by 5) 18*2606c877SAaron Ballman 19*2606c877SAaron Ballman M(\U1) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 20*2606c877SAaron Ballman M(\U12) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 21*2606c877SAaron Ballman M(\U123) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 22*2606c877SAaron Ballman M(\U1234) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} \ 23*2606c877SAaron Ballman expected-note {{did you mean to use '\u'?}} 24*2606c877SAaron Ballman M(\U12345) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 25*2606c877SAaron Ballman M(\U123456) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 26*2606c877SAaron Ballman M(\U1234567) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} 27*2606c877SAaron Ballman M(\U12345678) // Okay 28*2606c877SAaron Ballman M(\U123456789) // Okay-ish, two tokens (valid-per-spec-but-actually-invalid UCN followed by 9) 29*2606c877SAaron Ballman 30*2606c877SAaron Ballman // Now test the ones that should work. Note, these work in C17 and earlier but 31*2606c877SAaron Ballman // are part of the basic character set in C23 and thus should be diagnosed in 32*2606c877SAaron Ballman // that mode. They're valid in a character constant, but not valid in an 33*2606c877SAaron Ballman // identifier, except for U+0024 which is allowed if -fdollars-in-identifiers 34*2606c877SAaron Ballman // is enabled. 35*2606c877SAaron Ballman // FIXME: These three should be handled the same way, and should be accepted 36*2606c877SAaron Ballman // when dollar signs are allowed in identifiers, rather than rejected, see 37*2606c877SAaron Ballman // GH87106. 38*2606c877SAaron Ballman M(\u0024) // expected-error {{character '$' cannot be specified by a universal character name}} 39*2606c877SAaron Ballman M(\U00000024) // expected-error {{character '$' cannot be specified by a universal character name}} 40*2606c877SAaron Ballman M($) 41*2606c877SAaron Ballman 42*2606c877SAaron Ballman // These should always be rejected because they're not valid identifier 43*2606c877SAaron Ballman // characters. 44*2606c877SAaron Ballman // FIXME: the diagnostic could be improved to make it clear this is an issue 45*2606c877SAaron Ballman // with forming an identifier rather than a UCN. 46*2606c877SAaron Ballman M(\u0040) // expected-error {{character '@' cannot be specified by a universal character name}} 47*2606c877SAaron Ballman M(\u0060) // expected-error {{character '`' cannot be specified by a universal character name}} 48*2606c877SAaron Ballman M(\U00000040) // expected-error {{character '@' cannot be specified by a universal character name}} 49*2606c877SAaron Ballman M(\U00000060) // expected-error {{character '`' cannot be specified by a universal character name}} 50*2606c877SAaron Ballman 51*2606c877SAaron Ballman // UCNs outside of identifiers are handled in Phase 5 of translation, so we 52*2606c877SAaron Ballman // cannot use the macro expansion to test their behavior. 53*2606c877SAaron Ballman 54*2606c877SAaron Ballman // This is outside of the range of values specified by ISO 10646. 55*2606c877SAaron Ballman const char *c1 = "\U00110000"; // expected-error {{invalid universal character}} 56*2606c877SAaron Ballman // This does not fall outside of the range 57*2606c877SAaron Ballman const char *c2 = "\U0010FFFF"; 58*2606c877SAaron Ballman 59*2606c877SAaron Ballman // These should always be accepted because they're a valid in a character 60*2606c877SAaron Ballman // constant. 61*2606c877SAaron Ballman int c3 = '\u0024'; 62*2606c877SAaron Ballman int c4 = '\u0040'; 63*2606c877SAaron Ballman int c5 = '\u0060'; 64*2606c877SAaron Ballman 65*2606c877SAaron Ballman int c6 = '\U00000024'; 66*2606c877SAaron Ballman int c7 = '\U00000040'; 67*2606c877SAaron Ballman int c8 = '\U00000060'; 68*2606c877SAaron Ballman 69*2606c877SAaron Ballman // Valid lone surrogates. 70*2606c877SAaron Ballman M(\uD799) 71*2606c877SAaron Ballman const char *c9 = "\U0000E000"; 72*2606c877SAaron Ballman 73*2606c877SAaron Ballman // Invalid lone surrogates, which are excluded explicitly by 6.4.3p2. 74*2606c877SAaron Ballman M(\uD800) // expected-error {{invalid universal character}} 75*2606c877SAaron Ballman const char *c10 = "\U0000DFFF"; // expected-error {{invalid universal character}} 76