xref: /llvm-project/clang/test/C/C99/n717.c (revision 2606c87788153bf33d854fa5c3a03e16d544c5d7)
1*2606c877SAaron Ballman // RUN: %clang_cc1 -verify -std=c99 %s
2*2606c877SAaron Ballman // RUN: %clang_cc1 -verify -std=c99 -fno-dollars-in-identifiers %s
3*2606c877SAaron Ballman 
4*2606c877SAaron Ballman /* WG14 N717: Clang 17
5*2606c877SAaron Ballman  * Extended identifiers
6*2606c877SAaron Ballman  */
7*2606c877SAaron Ballman 
8*2606c877SAaron Ballman // Used as a sink for UCNs.
9*2606c877SAaron Ballman #define M(arg)
10*2606c877SAaron Ballman 
11*2606c877SAaron Ballman // C99 6.4.3p1 specifies the grammar for UCNs. A \u must be followed by exactly
12*2606c877SAaron Ballman // four hex digits, and \U must be followed by exactly eight.
13*2606c877SAaron Ballman M(\u1)    // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
14*2606c877SAaron Ballman M(\u12)   // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
15*2606c877SAaron Ballman M(\u123)  // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
16*2606c877SAaron Ballman M(\u1234) // Okay
17*2606c877SAaron Ballman M(\u12345)// Okay, two tokens (UCN followed by 5)
18*2606c877SAaron Ballman 
19*2606c877SAaron Ballman M(\U1)         // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
20*2606c877SAaron Ballman M(\U12)        // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
21*2606c877SAaron Ballman M(\U123)       // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
22*2606c877SAaron Ballman M(\U1234)      // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} \
23*2606c877SAaron Ballman                   expected-note {{did you mean to use '\u'?}}
24*2606c877SAaron Ballman M(\U12345)     // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
25*2606c877SAaron Ballman M(\U123456)    // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
26*2606c877SAaron Ballman M(\U1234567)   // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
27*2606c877SAaron Ballman M(\U12345678)  // Okay
28*2606c877SAaron Ballman M(\U123456789) // Okay-ish, two tokens (valid-per-spec-but-actually-invalid UCN followed by 9)
29*2606c877SAaron Ballman 
30*2606c877SAaron Ballman // Now test the ones that should work. Note, these work in C17 and earlier but
31*2606c877SAaron Ballman // are part of the basic character set in C23 and thus should be diagnosed in
32*2606c877SAaron Ballman // that mode. They're valid in a character constant, but not valid in an
33*2606c877SAaron Ballman // identifier, except for U+0024 which is allowed if -fdollars-in-identifiers
34*2606c877SAaron Ballman // is enabled.
35*2606c877SAaron Ballman // FIXME: These three should be handled the same way, and should be accepted
36*2606c877SAaron Ballman // when dollar signs are allowed in identifiers, rather than rejected, see
37*2606c877SAaron Ballman // GH87106.
38*2606c877SAaron Ballman M(\u0024) // expected-error {{character '$' cannot be specified by a universal character name}}
39*2606c877SAaron Ballman M(\U00000024) // expected-error {{character '$' cannot be specified by a universal character name}}
40*2606c877SAaron Ballman M($)
41*2606c877SAaron Ballman 
42*2606c877SAaron Ballman // These should always be rejected because they're not valid identifier
43*2606c877SAaron Ballman // characters.
44*2606c877SAaron Ballman // FIXME: the diagnostic could be improved to make it clear this is an issue
45*2606c877SAaron Ballman // with forming an identifier rather than a UCN.
46*2606c877SAaron Ballman M(\u0040) // expected-error {{character '@' cannot be specified by a universal character name}}
47*2606c877SAaron Ballman M(\u0060) // expected-error {{character '`' cannot be specified by a universal character name}}
48*2606c877SAaron Ballman M(\U00000040) // expected-error {{character '@' cannot be specified by a universal character name}}
49*2606c877SAaron Ballman M(\U00000060) // expected-error {{character '`' cannot be specified by a universal character name}}
50*2606c877SAaron Ballman 
51*2606c877SAaron Ballman // UCNs outside of identifiers are handled in Phase 5 of translation, so we
52*2606c877SAaron Ballman // cannot use the macro expansion to test their behavior.
53*2606c877SAaron Ballman 
54*2606c877SAaron Ballman // This is outside of the range of values specified by ISO 10646.
55*2606c877SAaron Ballman const char *c1 = "\U00110000"; // expected-error {{invalid universal character}}
56*2606c877SAaron Ballman // This does not fall outside of the range
57*2606c877SAaron Ballman const char *c2 = "\U0010FFFF";
58*2606c877SAaron Ballman 
59*2606c877SAaron Ballman // These should always be accepted because they're a valid in a character
60*2606c877SAaron Ballman // constant.
61*2606c877SAaron Ballman int c3 = '\u0024';
62*2606c877SAaron Ballman int c4 = '\u0040';
63*2606c877SAaron Ballman int c5 = '\u0060';
64*2606c877SAaron Ballman 
65*2606c877SAaron Ballman int c6 = '\U00000024';
66*2606c877SAaron Ballman int c7 = '\U00000040';
67*2606c877SAaron Ballman int c8 = '\U00000060';
68*2606c877SAaron Ballman 
69*2606c877SAaron Ballman // Valid lone surrogates.
70*2606c877SAaron Ballman M(\uD799)
71*2606c877SAaron Ballman const char *c9 = "\U0000E000";
72*2606c877SAaron Ballman 
73*2606c877SAaron Ballman // Invalid lone surrogates, which are excluded explicitly by 6.4.3p2.
74*2606c877SAaron Ballman M(\uD800) // expected-error {{invalid universal character}}
75*2606c877SAaron Ballman const char *c10  = "\U0000DFFF"; // expected-error {{invalid universal character}}
76