1 /* Test the Unicode character name functions.
2 Copyright (C) 2000-2003, 2005 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25
26 #include "exit.h"
27 #include "xalloc.h"
28 #include "uniname.h"
29
30 /* The names according to the UnicodeData.txt file, modified to contain the
31 Hangul syllable names, as described in the Unicode 3.0 book. */
32 const char * unicode_names [0x110000];
33
34 /* Maximum length of a field in the UnicodeData.txt file. */
35 #define FIELDLEN 120
36
37 /* Reads the next field from STREAM. The buffer BUFFER has size FIELDLEN.
38 Reads up to (but excluding) DELIM.
39 Returns 1 when a field was successfully read, otherwise 0. */
40 static int
getfield(FILE * stream,char * buffer,int delim)41 getfield (FILE *stream, char *buffer, int delim)
42 {
43 int count = 0;
44 int c;
45
46 for (; (c = getc (stream)), (c != EOF && c != delim); )
47 {
48 /* Put c into the buffer. */
49 if (++count >= FIELDLEN - 1)
50 {
51 fprintf (stderr, "field too long\n");
52 exit (EXIT_FAILURE);
53 }
54 *buffer++ = c;
55 }
56
57 if (c == EOF)
58 return 0;
59
60 *buffer = '\0';
61 return 1;
62 }
63
64 /* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
65 file. */
66 static void
fill_names(const char * unicodedata_filename)67 fill_names (const char *unicodedata_filename)
68 {
69 unsigned int i;
70 FILE *stream;
71 char field0[FIELDLEN];
72 char field1[FIELDLEN];
73 int lineno = 0;
74
75 for (i = 0; i < 0x110000; i++)
76 unicode_names[i] = NULL;
77
78 stream = fopen (unicodedata_filename, "r");
79 if (stream == NULL)
80 {
81 fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
82 exit (EXIT_FAILURE);
83 }
84
85 for (;;)
86 {
87 int n;
88 int c;
89
90 lineno++;
91 n = getfield (stream, field0, ';');
92 n += getfield (stream, field1, ';');
93 if (n == 0)
94 break;
95 if (n != 2)
96 {
97 fprintf (stderr, "short line in '%s':%d\n",
98 unicodedata_filename, lineno);
99 exit (EXIT_FAILURE);
100 }
101 for (; (c = getc (stream)), (c != EOF && c != '\n'); )
102 ;
103 i = strtoul (field0, NULL, 16);
104 if (i >= 0x110000)
105 {
106 fprintf (stderr, "index too large\n");
107 exit (EXIT_FAILURE);
108 }
109 unicode_names[i] = xstrdup (field1);
110 }
111 if (ferror (stream) || fclose (stream))
112 {
113 fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
114 exit (1);
115 }
116 }
117
118 /* Perform an exhaustive test of the unicode_character_name function. */
119 static int
test_name_lookup()120 test_name_lookup ()
121 {
122 int error = 0;
123 unsigned int i;
124 char buf[UNINAME_MAX];
125
126 for (i = 0; i < 0x11000; i++)
127 {
128 char *result = unicode_character_name (i, buf);
129
130 if (unicode_names[i] != NULL)
131 {
132 if (result == NULL)
133 {
134 fprintf (stderr, "\\u%04X name lookup failed!\n", i);
135 error = 1;
136 }
137 else if (strcmp (result, unicode_names[i]) != 0)
138 {
139 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
140 i, result);
141 error = 1;
142 }
143 }
144 else
145 {
146 if (result != NULL)
147 {
148 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
149 i, result);
150 error = 1;
151 }
152 }
153 }
154
155 for (i = 0x110000; i < 0x1000000; i++)
156 {
157 char *result = unicode_character_name (i, buf);
158
159 if (result != NULL)
160 {
161 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
162 i, result);
163 error = 1;
164 }
165 }
166
167 return error;
168 }
169
170 /* Perform a test of the unicode_name_character function. */
171 static int
test_inverse_lookup()172 test_inverse_lookup ()
173 {
174 int error = 0;
175 unsigned int i;
176
177 /* First, verify all valid character names are recognized. */
178 for (i = 0; i < 0x110000; i++)
179 if (unicode_names[i] != NULL)
180 {
181 unsigned int result = unicode_name_character (unicode_names[i]);
182 if (result != i)
183 {
184 if (result == UNINAME_INVALID)
185 fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
186 unicode_names[i]);
187 else
188 fprintf (stderr,
189 "inverse name lookup of \"%s\" returned 0x%04X\n",
190 unicode_names[i], result);
191 error = 1;
192 }
193 }
194
195 /* Second, generate random but likely names and verify they are not
196 recognized unless really valid. */
197 for (i = 0; i < 10000; i++)
198 {
199 unsigned int i1, i2;
200 const char *s1;
201 const char *s2;
202 unsigned int l1, l2, j1, j2;
203 char buf[2*UNINAME_MAX];
204 unsigned int result;
205
206 do i1 = ((rand () % 0x11) << 16)
207 + ((rand () & 0xff) << 8)
208 + (rand () & 0xff);
209 while (unicode_names[i1] == NULL);
210
211 do i2 = ((rand () % 0x11) << 16)
212 + ((rand () & 0xff) << 8)
213 + (rand () & 0xff);
214 while (unicode_names[i2] == NULL);
215
216 s1 = unicode_names[i1];
217 l1 = strlen (s1);
218 s2 = unicode_names[i2];
219 l2 = strlen (s2);
220
221 /* Concatenate a starting piece of s1 with an ending piece of s2. */
222 for (j1 = 1; j1 <= l1; j1++)
223 if (j1 == l1 || s1[j1] == ' ')
224 for (j2 = 0; j2 < l2; j2++)
225 if (j2 == 0 || s2[j2-1] == ' ')
226 {
227 memcpy (buf, s1, j1);
228 buf[j1] = ' ';
229 memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
230
231 result = unicode_name_character (buf);
232 if (result != UNINAME_INVALID
233 && !(unicode_names[result] != NULL
234 && strcmp (unicode_names[result], buf) == 0))
235 {
236 fprintf (stderr,
237 "inverse name lookup of \"%s\" returned 0x%04X\n",
238 unicode_names[i], result);
239 error = 1;
240 }
241 }
242 }
243
244 /* Third, some extreme case that used to loop. */
245 if (unicode_name_character ("A A") != UNINAME_INVALID)
246 error = 1;
247
248 return error;
249 }
250
251 int
main(int argc,char * argv[])252 main (int argc, char *argv[])
253 {
254 int error = 0;
255
256 fill_names (argv[1]);
257
258 error |= test_name_lookup ();
259 error |= test_inverse_lookup ();
260
261 return error;
262 }
263