xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/libuniname/test-names.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* Test the Unicode character name functions.
2    Copyright (C) 2000-2003, 2005 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software Foundation,
16    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
17 
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21 
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 
26 #include "exit.h"
27 #include "xalloc.h"
28 #include "uniname.h"
29 
30 /* The names according to the UnicodeData.txt file, modified to contain the
31    Hangul syllable names, as described in the Unicode 3.0 book.  */
32 const char * unicode_names [0x110000];
33 
34 /* Maximum length of a field in the UnicodeData.txt file.  */
35 #define FIELDLEN 120
36 
37 /* Reads the next field from STREAM.  The buffer BUFFER has size FIELDLEN.
38    Reads up to (but excluding) DELIM.
39    Returns 1 when a field was successfully read, otherwise 0.  */
40 static int
getfield(FILE * stream,char * buffer,int delim)41 getfield (FILE *stream, char *buffer, int delim)
42 {
43   int count = 0;
44   int c;
45 
46   for (; (c = getc (stream)), (c != EOF && c != delim); )
47     {
48       /* Put c into the buffer.  */
49       if (++count >= FIELDLEN - 1)
50 	{
51 	  fprintf (stderr, "field too long\n");
52 	  exit (EXIT_FAILURE);
53 	}
54       *buffer++ = c;
55     }
56 
57   if (c == EOF)
58     return 0;
59 
60   *buffer = '\0';
61   return 1;
62 }
63 
64 /* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
65    file.  */
66 static void
fill_names(const char * unicodedata_filename)67 fill_names (const char *unicodedata_filename)
68 {
69   unsigned int i;
70   FILE *stream;
71   char field0[FIELDLEN];
72   char field1[FIELDLEN];
73   int lineno = 0;
74 
75   for (i = 0; i < 0x110000; i++)
76     unicode_names[i] = NULL;
77 
78   stream = fopen (unicodedata_filename, "r");
79   if (stream == NULL)
80     {
81       fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
82       exit (EXIT_FAILURE);
83     }
84 
85   for (;;)
86     {
87       int n;
88       int c;
89 
90       lineno++;
91       n = getfield (stream, field0, ';');
92       n += getfield (stream, field1, ';');
93       if (n == 0)
94 	break;
95       if (n != 2)
96 	{
97 	  fprintf (stderr, "short line in '%s':%d\n",
98 		   unicodedata_filename, lineno);
99 	  exit (EXIT_FAILURE);
100 	}
101       for (; (c = getc (stream)), (c != EOF && c != '\n'); )
102 	;
103       i = strtoul (field0, NULL, 16);
104       if (i >= 0x110000)
105 	{
106 	  fprintf (stderr, "index too large\n");
107 	  exit (EXIT_FAILURE);
108 	}
109       unicode_names[i] = xstrdup (field1);
110     }
111   if (ferror (stream) || fclose (stream))
112     {
113       fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
114       exit (1);
115     }
116 }
117 
118 /* Perform an exhaustive test of the unicode_character_name function.  */
119 static int
test_name_lookup()120 test_name_lookup ()
121 {
122   int error = 0;
123   unsigned int i;
124   char buf[UNINAME_MAX];
125 
126   for (i = 0; i < 0x11000; i++)
127     {
128       char *result = unicode_character_name (i, buf);
129 
130       if (unicode_names[i] != NULL)
131 	{
132 	  if (result == NULL)
133 	    {
134 	      fprintf (stderr, "\\u%04X name lookup failed!\n", i);
135 	      error = 1;
136 	    }
137 	  else if (strcmp (result, unicode_names[i]) != 0)
138 	    {
139 	      fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
140 			       i, result);
141 	      error = 1;
142 	    }
143 	}
144       else
145 	{
146 	  if (result != NULL)
147 	    {
148 	      fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
149 			       i, result);
150 	      error = 1;
151 	    }
152 	}
153     }
154 
155   for (i = 0x110000; i < 0x1000000; i++)
156     {
157       char *result = unicode_character_name (i, buf);
158 
159       if (result != NULL)
160 	{
161 	  fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
162 			   i, result);
163 	  error = 1;
164 	}
165     }
166 
167   return error;
168 }
169 
170 /* Perform a test of the unicode_name_character function.  */
171 static int
test_inverse_lookup()172 test_inverse_lookup ()
173 {
174   int error = 0;
175   unsigned int i;
176 
177   /* First, verify all valid character names are recognized.  */
178   for (i = 0; i < 0x110000; i++)
179     if (unicode_names[i] != NULL)
180       {
181 	unsigned int result = unicode_name_character (unicode_names[i]);
182 	if (result != i)
183 	  {
184 	    if (result == UNINAME_INVALID)
185 	      fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
186 		       unicode_names[i]);
187 	    else
188 	      fprintf (stderr,
189 		       "inverse name lookup of \"%s\" returned 0x%04X\n",
190 		       unicode_names[i], result);
191 	    error = 1;
192 	  }
193       }
194 
195   /* Second, generate random but likely names and verify they are not
196      recognized unless really valid.  */
197   for (i = 0; i < 10000; i++)
198     {
199       unsigned int i1, i2;
200       const char *s1;
201       const char *s2;
202       unsigned int l1, l2, j1, j2;
203       char buf[2*UNINAME_MAX];
204       unsigned int result;
205 
206       do i1 = ((rand () % 0x11) << 16)
207 	      + ((rand () & 0xff) << 8)
208 	      + (rand () & 0xff);
209       while (unicode_names[i1] == NULL);
210 
211       do i2 = ((rand () % 0x11) << 16)
212 	      + ((rand () & 0xff) << 8)
213 	      + (rand () & 0xff);
214       while (unicode_names[i2] == NULL);
215 
216       s1 = unicode_names[i1];
217       l1 = strlen (s1);
218       s2 = unicode_names[i2];
219       l2 = strlen (s2);
220 
221       /* Concatenate a starting piece of s1 with an ending piece of s2.  */
222       for (j1 = 1; j1 <= l1; j1++)
223 	if (j1 == l1 || s1[j1] == ' ')
224 	  for (j2 = 0; j2 < l2; j2++)
225 	    if (j2 == 0 || s2[j2-1] == ' ')
226 	      {
227 		memcpy (buf, s1, j1);
228 		buf[j1] = ' ';
229 		memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
230 
231 		result = unicode_name_character (buf);
232 		if (result != UNINAME_INVALID
233 		    && !(unicode_names[result] != NULL
234 			 && strcmp (unicode_names[result], buf) == 0))
235 		  {
236 		    fprintf (stderr,
237 			     "inverse name lookup of \"%s\" returned 0x%04X\n",
238 			     unicode_names[i], result);
239 		    error = 1;
240 		  }
241 	      }
242     }
243 
244   /* Third, some extreme case that used to loop.  */
245   if (unicode_name_character ("A A") != UNINAME_INVALID)
246     error = 1;
247 
248   return error;
249 }
250 
251 int
main(int argc,char * argv[])252 main (int argc, char *argv[])
253 {
254   int error = 0;
255 
256   fill_names (argv[1]);
257 
258   error |= test_name_lookup ();
259   error |= test_inverse_lookup ();
260 
261   return error;
262 }
263