1*c87b03e5Sespie /* Shared functions related to mangling names for the GNU compiler
2*c87b03e5Sespie for the Java(TM) language.
3*c87b03e5Sespie Copyright (C) 2001 Free Software Foundation, Inc.
4*c87b03e5Sespie
5*c87b03e5Sespie This file is part of GNU CC.
6*c87b03e5Sespie
7*c87b03e5Sespie GNU CC is free software; you can redistribute it and/or modify
8*c87b03e5Sespie it under the terms of the GNU General Public License as published by
9*c87b03e5Sespie the Free Software Foundation; either version 2, or (at your option)
10*c87b03e5Sespie any later version.
11*c87b03e5Sespie
12*c87b03e5Sespie GNU CC is distributed in the hope that it will be useful,
13*c87b03e5Sespie but WITHOUT ANY WARRANTY; without even the implied warranty of
14*c87b03e5Sespie MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15*c87b03e5Sespie GNU General Public License for more details.
16*c87b03e5Sespie
17*c87b03e5Sespie You should have received a copy of the GNU General Public License
18*c87b03e5Sespie along with GNU CC; see the file COPYING. If not, write to
19*c87b03e5Sespie the Free Software Foundation, 59 Temple Place - Suite 330,
20*c87b03e5Sespie Boston, MA 02111-1307, USA.
21*c87b03e5Sespie
22*c87b03e5Sespie Java and all Java-based marks are trademarks or registered trademarks
23*c87b03e5Sespie of Sun Microsystems, Inc. in the United States and other countries.
24*c87b03e5Sespie The Free Software Foundation is independent of Sun Microsystems, Inc. */
25*c87b03e5Sespie
26*c87b03e5Sespie /* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */
27*c87b03e5Sespie
28*c87b03e5Sespie #include "config.h"
29*c87b03e5Sespie #include "system.h"
30*c87b03e5Sespie #include "jcf.h"
31*c87b03e5Sespie #include "tree.h"
32*c87b03e5Sespie #include "java-tree.h"
33*c87b03e5Sespie #include "obstack.h"
34*c87b03e5Sespie #include "toplev.h"
35*c87b03e5Sespie
36*c87b03e5Sespie static void append_unicode_mangled_name PARAMS ((const char *, int));
37*c87b03e5Sespie #ifndef HAVE_AS_UTF8
38*c87b03e5Sespie static int unicode_mangling_length PARAMS ((const char *, int));
39*c87b03e5Sespie #endif
40*c87b03e5Sespie
41*c87b03e5Sespie extern struct obstack *mangle_obstack;
42*c87b03e5Sespie
43*c87b03e5Sespie /* If the assembler doesn't support UTF8 in symbol names, some
44*c87b03e5Sespie characters might need to be escaped. */
45*c87b03e5Sespie
46*c87b03e5Sespie #ifndef HAVE_AS_UTF8
47*c87b03e5Sespie
48*c87b03e5Sespie /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
49*c87b03e5Sespie appropriately mangled (with Unicode escapes if needed) to
50*c87b03e5Sespie MANGLE_OBSTACK. Note that `java', `lang' and `Object' are used so
51*c87b03e5Sespie frequently that they could be cached. */
52*c87b03e5Sespie
53*c87b03e5Sespie void
append_gpp_mangled_name(name,len)54*c87b03e5Sespie append_gpp_mangled_name (name, len)
55*c87b03e5Sespie const char *name;
56*c87b03e5Sespie int len;
57*c87b03e5Sespie {
58*c87b03e5Sespie int encoded_len = unicode_mangling_length (name, len);
59*c87b03e5Sespie int needs_escapes = encoded_len > 0;
60*c87b03e5Sespie char buf[6];
61*c87b03e5Sespie
62*c87b03e5Sespie sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
63*c87b03e5Sespie obstack_grow (mangle_obstack, buf, strlen (buf));
64*c87b03e5Sespie
65*c87b03e5Sespie if (needs_escapes)
66*c87b03e5Sespie append_unicode_mangled_name (name, len);
67*c87b03e5Sespie else
68*c87b03e5Sespie obstack_grow (mangle_obstack, name, len);
69*c87b03e5Sespie }
70*c87b03e5Sespie
71*c87b03e5Sespie /* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string
72*c87b03e5Sespie appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
73*c87b03e5Sespie Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in
74*c87b03e5Sespie which case `__U' will be mangled `__U_'. */
75*c87b03e5Sespie
76*c87b03e5Sespie static void
append_unicode_mangled_name(name,len)77*c87b03e5Sespie append_unicode_mangled_name (name, len)
78*c87b03e5Sespie const char *name;
79*c87b03e5Sespie int len;
80*c87b03e5Sespie {
81*c87b03e5Sespie const unsigned char *ptr;
82*c87b03e5Sespie const unsigned char *limit = (const unsigned char *)name + len;
83*c87b03e5Sespie int uuU = 0;
84*c87b03e5Sespie for (ptr = (const unsigned char *) name; ptr < limit; )
85*c87b03e5Sespie {
86*c87b03e5Sespie int ch = UTF8_GET(ptr, limit);
87*c87b03e5Sespie
88*c87b03e5Sespie if ((ISALNUM (ch) && ch != 'U') || ch == '$')
89*c87b03e5Sespie obstack_1grow (mangle_obstack, ch);
90*c87b03e5Sespie /* Everything else needs encoding */
91*c87b03e5Sespie else
92*c87b03e5Sespie {
93*c87b03e5Sespie char buf [9];
94*c87b03e5Sespie if (ch == '_' || ch == 'U')
95*c87b03e5Sespie {
96*c87b03e5Sespie /* Prepare to recognize __U */
97*c87b03e5Sespie if (ch == '_' && (uuU < 3))
98*c87b03e5Sespie {
99*c87b03e5Sespie uuU++;
100*c87b03e5Sespie obstack_1grow (mangle_obstack, ch);
101*c87b03e5Sespie }
102*c87b03e5Sespie /* We recognize __U that we wish to encode
103*c87b03e5Sespie __U_. Finish the encoding. */
104*c87b03e5Sespie else if (ch == 'U' && (uuU == 2))
105*c87b03e5Sespie {
106*c87b03e5Sespie uuU = 0;
107*c87b03e5Sespie obstack_grow (mangle_obstack, "U_", 2);
108*c87b03e5Sespie }
109*c87b03e5Sespie /* Otherwise, just reset uuU and emit the character we
110*c87b03e5Sespie have. */
111*c87b03e5Sespie else
112*c87b03e5Sespie {
113*c87b03e5Sespie uuU = 0;
114*c87b03e5Sespie obstack_1grow (mangle_obstack, ch);
115*c87b03e5Sespie }
116*c87b03e5Sespie continue;
117*c87b03e5Sespie }
118*c87b03e5Sespie sprintf (buf, "__U%x_", ch);
119*c87b03e5Sespie obstack_grow (mangle_obstack, buf, strlen (buf));
120*c87b03e5Sespie uuU = 0;
121*c87b03e5Sespie }
122*c87b03e5Sespie }
123*c87b03e5Sespie }
124*c87b03e5Sespie
125*c87b03e5Sespie /* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
126*c87b03e5Sespie length of the string as mangled (a la g++) including Unicode
127*c87b03e5Sespie escapes. If no escapes are needed, return 0. */
128*c87b03e5Sespie
129*c87b03e5Sespie static int
unicode_mangling_length(name,len)130*c87b03e5Sespie unicode_mangling_length (name, len)
131*c87b03e5Sespie const char *name;
132*c87b03e5Sespie int len;
133*c87b03e5Sespie {
134*c87b03e5Sespie const unsigned char *ptr;
135*c87b03e5Sespie const unsigned char *limit = (const unsigned char *)name + len;
136*c87b03e5Sespie int need_escapes = 0; /* Whether we need an escape or not */
137*c87b03e5Sespie int num_chars = 0; /* Number of characters in the mangled name */
138*c87b03e5Sespie int uuU = 0; /* Help us to find __U. 0: '_', 1: '__' */
139*c87b03e5Sespie for (ptr = (const unsigned char *) name; ptr < limit; )
140*c87b03e5Sespie {
141*c87b03e5Sespie int ch = UTF8_GET(ptr, limit);
142*c87b03e5Sespie
143*c87b03e5Sespie if (ch < 0)
144*c87b03e5Sespie error ("internal error - invalid Utf8 name");
145*c87b03e5Sespie if ((ISALNUM (ch) && ch != 'U') || ch == '$')
146*c87b03e5Sespie num_chars++;
147*c87b03e5Sespie /* Everything else needs encoding */
148*c87b03e5Sespie else
149*c87b03e5Sespie {
150*c87b03e5Sespie int encoding_length = 2;
151*c87b03e5Sespie
152*c87b03e5Sespie if (ch == '_' || ch == 'U')
153*c87b03e5Sespie {
154*c87b03e5Sespie /* It's always at least one character. */
155*c87b03e5Sespie num_chars++;
156*c87b03e5Sespie
157*c87b03e5Sespie /* Prepare to recognize __U */
158*c87b03e5Sespie if (ch == '_' && (uuU < 3))
159*c87b03e5Sespie uuU++;
160*c87b03e5Sespie
161*c87b03e5Sespie /* We recognize __U that we wish to encode __U_, we
162*c87b03e5Sespie count one more character. */
163*c87b03e5Sespie else if (ch == 'U' && (uuU == 2))
164*c87b03e5Sespie {
165*c87b03e5Sespie num_chars++;
166*c87b03e5Sespie need_escapes = 1;
167*c87b03e5Sespie uuU = 0;
168*c87b03e5Sespie }
169*c87b03e5Sespie /* Otherwise, just reset uuU */
170*c87b03e5Sespie else
171*c87b03e5Sespie uuU = 0;
172*c87b03e5Sespie
173*c87b03e5Sespie continue;
174*c87b03e5Sespie }
175*c87b03e5Sespie
176*c87b03e5Sespie if (ch > 0xff)
177*c87b03e5Sespie encoding_length++;
178*c87b03e5Sespie if (ch > 0xfff)
179*c87b03e5Sespie encoding_length++;
180*c87b03e5Sespie
181*c87b03e5Sespie num_chars += (4 + encoding_length);
182*c87b03e5Sespie need_escapes = 1;
183*c87b03e5Sespie uuU = 0;
184*c87b03e5Sespie }
185*c87b03e5Sespie }
186*c87b03e5Sespie if (need_escapes)
187*c87b03e5Sespie return num_chars;
188*c87b03e5Sespie else
189*c87b03e5Sespie return 0;
190*c87b03e5Sespie }
191*c87b03e5Sespie
192*c87b03e5Sespie #else
193*c87b03e5Sespie
194*c87b03e5Sespie /* The assembler supports UTF8, we don't use escapes. Mangling is
195*c87b03e5Sespie simply <N>NAME. <N> is the number of UTF8 encoded characters that
196*c87b03e5Sespie are found in NAME. Note that `java', `lang' and `Object' are used
197*c87b03e5Sespie so frequently that they could be cached. */
198*c87b03e5Sespie
199*c87b03e5Sespie void
append_gpp_mangled_name(name,len)200*c87b03e5Sespie append_gpp_mangled_name (name, len)
201*c87b03e5Sespie const char *name;
202*c87b03e5Sespie int len;
203*c87b03e5Sespie {
204*c87b03e5Sespie const unsigned char *ptr;
205*c87b03e5Sespie const unsigned char *limit = (const unsigned char *)name + len;
206*c87b03e5Sespie int encoded_len;
207*c87b03e5Sespie char buf [6];
208*c87b03e5Sespie
209*c87b03e5Sespie /* Compute the length of the string we wish to mangle. */
210*c87b03e5Sespie for (encoded_len = 0, ptr = (const unsigned char *) name;
211*c87b03e5Sespie ptr < limit; encoded_len++)
212*c87b03e5Sespie {
213*c87b03e5Sespie int ch = UTF8_GET(ptr, limit);
214*c87b03e5Sespie
215*c87b03e5Sespie if (ch < 0)
216*c87b03e5Sespie error ("internal error - invalid Utf8 name");
217*c87b03e5Sespie }
218*c87b03e5Sespie
219*c87b03e5Sespie sprintf (buf, "%d", encoded_len);
220*c87b03e5Sespie obstack_grow (mangle_obstack, buf, strlen (buf));
221*c87b03e5Sespie obstack_grow (mangle_obstack, name, len);
222*c87b03e5Sespie }
223*c87b03e5Sespie
224*c87b03e5Sespie #endif /* HAVE_AS_UTF8 */
225