xref: /openbsd-src/gnu/usr.bin/gcc/gcc/java/mangle_name.c (revision c87b03e512fc05ed6e0222f6fb0ae86264b1d05b)
1 /* Shared functions related to mangling names for the GNU compiler
2    for the Java(TM) language.
3    Copyright (C) 2001 Free Software Foundation, Inc.
4 
5 This file is part of GNU CC.
6 
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11 
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING.  If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
21 
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc.  */
25 
26 /* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */
27 
28 #include "config.h"
29 #include "system.h"
30 #include "jcf.h"
31 #include "tree.h"
32 #include "java-tree.h"
33 #include "obstack.h"
34 #include "toplev.h"
35 
36 static void append_unicode_mangled_name PARAMS ((const char *, int));
37 #ifndef HAVE_AS_UTF8
38 static int  unicode_mangling_length PARAMS ((const char *, int));
39 #endif
40 
41 extern struct obstack *mangle_obstack;
42 
43 /* If the assembler doesn't support UTF8 in symbol names, some
44    characters might need to be escaped.  */
45 
46 #ifndef HAVE_AS_UTF8
47 
48 /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
49    appropriately mangled (with Unicode escapes if needed) to
50    MANGLE_OBSTACK.  Note that `java', `lang' and `Object' are used so
51    frequently that they could be cached.  */
52 
53 void
append_gpp_mangled_name(name,len)54 append_gpp_mangled_name (name, len)
55      const char *name;
56      int len;
57 {
58   int encoded_len = unicode_mangling_length (name, len);
59   int needs_escapes = encoded_len > 0;
60   char buf[6];
61 
62   sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
63   obstack_grow (mangle_obstack, buf, strlen (buf));
64 
65   if (needs_escapes)
66     append_unicode_mangled_name (name, len);
67   else
68     obstack_grow (mangle_obstack, name, len);
69 }
70 
71 /* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string
72    appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
73    Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in
74    which case `__U' will be mangled `__U_'.  */
75 
76 static void
append_unicode_mangled_name(name,len)77 append_unicode_mangled_name (name, len)
78      const char *name;
79      int len;
80 {
81   const unsigned char *ptr;
82   const unsigned char *limit = (const unsigned char *)name + len;
83   int uuU = 0;
84   for (ptr = (const unsigned char *) name;  ptr < limit;  )
85     {
86       int ch = UTF8_GET(ptr, limit);
87 
88       if ((ISALNUM (ch) && ch != 'U') || ch == '$')
89 	obstack_1grow (mangle_obstack, ch);
90       /* Everything else needs encoding */
91       else
92 	{
93 	  char buf [9];
94 	  if (ch == '_' || ch == 'U')
95 	    {
96 	      /* Prepare to recognize __U */
97 	      if (ch == '_' && (uuU < 3))
98 		{
99 		  uuU++;
100 		  obstack_1grow (mangle_obstack, ch);
101 		}
102 	      /* We recognize __U that we wish to encode
103                  __U_. Finish the encoding. */
104 	      else if (ch == 'U' && (uuU == 2))
105 		{
106 		  uuU = 0;
107 		  obstack_grow (mangle_obstack, "U_", 2);
108 		}
109 	      /* Otherwise, just reset uuU and emit the character we
110                  have. */
111 	      else
112 		{
113 		  uuU = 0;
114 		  obstack_1grow (mangle_obstack, ch);
115 		}
116 	      continue;
117 	    }
118 	  sprintf (buf, "__U%x_", ch);
119 	  obstack_grow (mangle_obstack, buf, strlen (buf));
120 	  uuU = 0;
121 	}
122     }
123 }
124 
125 /* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
126    length of the string as mangled (a la g++) including Unicode
127    escapes.  If no escapes are needed, return 0.  */
128 
129 static int
unicode_mangling_length(name,len)130 unicode_mangling_length (name, len)
131      const char *name;
132      int len;
133 {
134   const unsigned char *ptr;
135   const unsigned char *limit = (const unsigned char *)name + len;
136   int need_escapes = 0;		/* Whether we need an escape or not */
137   int num_chars = 0;		/* Number of characters in the mangled name */
138   int uuU = 0;			/* Help us to find __U. 0: '_', 1: '__' */
139   for (ptr = (const unsigned char *) name;  ptr < limit;  )
140     {
141       int ch = UTF8_GET(ptr, limit);
142 
143       if (ch < 0)
144 	error ("internal error - invalid Utf8 name");
145       if ((ISALNUM (ch) && ch != 'U') || ch == '$')
146 	num_chars++;
147       /* Everything else needs encoding */
148       else
149 	{
150 	  int encoding_length = 2;
151 
152 	  if (ch == '_' || ch == 'U')
153 	    {
154 	      /* It's always at least one character. */
155 	      num_chars++;
156 
157 	      /* Prepare to recognize __U */
158 	      if (ch == '_' && (uuU < 3))
159 		uuU++;
160 
161 	      /* We recognize __U that we wish to encode __U_, we
162 	         count one more character. */
163 	      else if (ch == 'U' && (uuU == 2))
164 		{
165 		  num_chars++;
166 		  need_escapes = 1;
167 		  uuU = 0;
168 		}
169 	      /* Otherwise, just reset uuU */
170 	      else
171 		uuU = 0;
172 
173 	      continue;
174 	    }
175 
176 	  if (ch > 0xff)
177 	    encoding_length++;
178 	  if (ch > 0xfff)
179 	    encoding_length++;
180 
181 	  num_chars += (4 + encoding_length);
182 	  need_escapes = 1;
183 	  uuU = 0;
184 	}
185     }
186   if (need_escapes)
187     return num_chars;
188   else
189     return 0;
190 }
191 
192 #else
193 
194 /* The assembler supports UTF8, we don't use escapes. Mangling is
195    simply <N>NAME. <N> is the number of UTF8 encoded characters that
196    are found in NAME. Note that `java', `lang' and `Object' are used
197    so frequently that they could be cached.  */
198 
199 void
append_gpp_mangled_name(name,len)200 append_gpp_mangled_name (name, len)
201      const char *name;
202      int len;
203 {
204   const unsigned char *ptr;
205   const unsigned char *limit = (const unsigned char *)name + len;
206   int encoded_len;
207   char buf [6];
208 
209   /* Compute the length of the string we wish to mangle. */
210   for (encoded_len =  0, ptr = (const unsigned char *) name;
211        ptr < limit; encoded_len++)
212     {
213       int ch = UTF8_GET(ptr, limit);
214 
215       if (ch < 0)
216 	error ("internal error - invalid Utf8 name");
217     }
218 
219   sprintf (buf, "%d", encoded_len);
220   obstack_grow (mangle_obstack, buf, strlen (buf));
221   obstack_grow (mangle_obstack, name, len);
222 }
223 
224 #endif /* HAVE_AS_UTF8 */
225