xref: /llvm-project/clang/test/Lexer/unicode.c (revision c9ab1d890586bd8a6a194e6a37968538b80f81bd)
1 // RUN: %clang_cc1 -fsyntax-only -verify -x c -std=c11 %s
2 // RUN: %clang_cc1 -fsyntax-only -verify=expected,c2x -x c -std=c2x %s
3 // RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx -x c++ -std=c++11 %s
4 // RUN: %clang_cc1 -std=c99 -E -DPP_ONLY=1 %s | FileCheck %s --strict-whitespace
5 // RUN: %clang_cc1 -E -DPP_ONLY=1 %s | FileCheck %s --strict-whitespace
6 // UNSUPPORTED: system-zos
7 
8 // This file contains Unicode characters; please do not "fix" them!
9 
10 extern int x; // expected-warning {{treating Unicode character as whitespace}}
11 extern int x; // expected-warning {{treating Unicode character as whitespace}}
12 
13 // CHECK: extern int {{x}}
14 // CHECK: extern int {{x}}
15 
16 #pragma mark ¡Unicode!
17 
18 #define COPYRIGHT Copyright © 2012
19 #define XSTR(X) #X
20 #define STR(X) XSTR(X)
21 
22 static const char *copyright = STR(COPYRIGHT); // no-warning
23 // CHECK: static const char *copyright = "Copyright © {{2012}}";
24 
25 #if PP_ONLY
26 COPYRIGHT
27 // CHECK: Copyright © {{2012}}
28 CHECK : The preprocessor should not complain about Unicode characters like ©.
29 #endif
30 
31 int a;
32 
33 extern int X\UAAAAAAAA; // expected-error {{not allowed in an identifier}}
34 int Y = '\UAAAAAAAA'; // expected-error {{invalid universal character}}
35 
36 #if defined(__cplusplus) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L)
37 
38 extern int ༀ;
39 extern int ��;
40 extern int ��;
41 extern int ꙮ;
42 extern int \u1B4C;     // BALINESE LETTER ARCHAIC JNYA - Added in Unicode 14
43 extern int \U00016AA2; // TANGSA LETTER GA - Added in Unicode 14
44 extern int \U0001E4D0; // �� NAG MUNDARI LETTER O - Added in Unicode 15
45 extern int \u{2EBF0}; // CJK UNIFIED IDEOGRAPH-2EBF0 - Added in Unicode 15.1
46 extern int a\N{TANGSA LETTER GA};
47 extern int a\N{TANGSALETTERGA}; // expected-error {{'TANGSALETTERGA' is not a valid Unicode character name}} \
48                                 // expected-error {{expected ';' after top level declarator}} \
49                                 // expected-note {{characters names in Unicode escape sequences are sensitive to case and whitespace}}
50 
51 extern int ��; // expected-warning {{mathematical notation character <U+1D6DB> in an identifier is a Clang extension}}
52 extern int ₉; // expected-error {{character <U+2089> not allowed at the start of an identifier}} \\
53                  expected-warning {{declaration does not declare anything}}
54 
55 int a¹b₍₄₂₎∇; // expected-warning 6{{mathematical notation character}}
56 
57 int \u{221E} = 1; // expected-warning {{mathematical notation character}}
58 int \N{MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL} = 1;
59                  // expected-warning@-1 {{mathematical notation character}}
60 
61 int a\N{SUBSCRIPT EQUALS SIGN} = 1; // expected-warning {{mathematical notation character}}
62 
63 // This character doesn't have the XID_Start property
64 extern int  \U00016AC0; // TANGSA DIGIT ZERO  // cxx-error {{expected unqualified-id}} \
65                                               // c2x-error {{expected identifier or '('}}
66 
67 extern int ��; // expected-error {{unexpected character <U+1F339>}} \
68                   expected-warning {{declaration does not declare anything}}
69 
70 extern int ��;   // MOOSE (Unicode 15) \
71                 // expected-error {{unexpected character <U+1FACE>}} \
72                    expected-warning {{declaration does not declare anything}}
73 
74 extern int ��; // expected-error {{unexpected character <U+1F477>}} \
75                   expected-warning {{declaration does not declare anything}}
76 
77 extern int ��‍♀; // expected-warning {{declaration does not declare anything}} \
78                   expected-error {{unexpected character <U+1F477>}} \
79                   expected-error {{character <U+200D> not allowed at the start of an identifier}} \
80                   expected-error {{unexpected character <U+2640>}}
81 #else
82 
83 // A �� by any other name....
84 extern int ��;
85 int ��(int ��) { return ��+ 1; }
main(void)86 int main (void) {
87   int �� = ��(��);
88   return ��;
89 }
90 
91 int n; = 3; // expected-warning {{treating Unicode character <U+037E> as an identifier character rather than as ';' symbol}}
92 int *n꞉꞉v = &n;; // expected-warning 2{{treating Unicode character <U+A789> as an identifier character rather than as ':' symbol}}
93                  // expected-warning@-1 {{treating Unicode character <U+037E> as an identifier character rather than as ';' symbol}}
94 int v=[=](auto){returnx;}(); // expected-warning 12{{treating Unicode character}}
95 
96 intxx‍;
97 // expected-warning@-1 {{identifier contains Unicode character <U+2060> that is invisible in some environments}}
98 // expected-warning@-2 {{identifier contains Unicode character <U+FEFF> that is invisible in some environments}}
99 // expected-warning@-3 {{identifier contains Unicode character <U+200D> that is invisible in some environments}}
100 int foobar = 0; // expected-warning {{identifier contains Unicode character <U+200B> that is invisible in some environments}}
101 int x = foobar; // expected-error {{undeclared identifier}}
102 
103 intfoo; // expected-error {{unexpected character <U+2223>}}
104 #ifndef PP_ONLY
105 #definex // expected-error {{macro name must be an identifier}}
106 #endif
107 
108 #endif
109