xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/libuniname/test-names.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1*946379e7Schristos /* Test the Unicode character name functions.
2*946379e7Schristos    Copyright (C) 2000-2003, 2005 Free Software Foundation, Inc.
3*946379e7Schristos 
4*946379e7Schristos    This program is free software; you can redistribute it and/or modify
5*946379e7Schristos    it under the terms of the GNU General Public License as published by
6*946379e7Schristos    the Free Software Foundation; either version 2, or (at your option)
7*946379e7Schristos    any later version.
8*946379e7Schristos 
9*946379e7Schristos    This program is distributed in the hope that it will be useful,
10*946379e7Schristos    but WITHOUT ANY WARRANTY; without even the implied warranty of
11*946379e7Schristos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12*946379e7Schristos    GNU General Public License for more details.
13*946379e7Schristos 
14*946379e7Schristos    You should have received a copy of the GNU General Public License
15*946379e7Schristos    along with this program; if not, write to the Free Software Foundation,
16*946379e7Schristos    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
17*946379e7Schristos 
18*946379e7Schristos #ifdef HAVE_CONFIG_H
19*946379e7Schristos # include <config.h>
20*946379e7Schristos #endif
21*946379e7Schristos 
22*946379e7Schristos #include <stdio.h>
23*946379e7Schristos #include <stdlib.h>
24*946379e7Schristos #include <string.h>
25*946379e7Schristos 
26*946379e7Schristos #include "exit.h"
27*946379e7Schristos #include "xalloc.h"
28*946379e7Schristos #include "uniname.h"
29*946379e7Schristos 
30*946379e7Schristos /* The names according to the UnicodeData.txt file, modified to contain the
31*946379e7Schristos    Hangul syllable names, as described in the Unicode 3.0 book.  */
32*946379e7Schristos const char * unicode_names [0x110000];
33*946379e7Schristos 
34*946379e7Schristos /* Maximum length of a field in the UnicodeData.txt file.  */
35*946379e7Schristos #define FIELDLEN 120
36*946379e7Schristos 
37*946379e7Schristos /* Reads the next field from STREAM.  The buffer BUFFER has size FIELDLEN.
38*946379e7Schristos    Reads up to (but excluding) DELIM.
39*946379e7Schristos    Returns 1 when a field was successfully read, otherwise 0.  */
40*946379e7Schristos static int
getfield(FILE * stream,char * buffer,int delim)41*946379e7Schristos getfield (FILE *stream, char *buffer, int delim)
42*946379e7Schristos {
43*946379e7Schristos   int count = 0;
44*946379e7Schristos   int c;
45*946379e7Schristos 
46*946379e7Schristos   for (; (c = getc (stream)), (c != EOF && c != delim); )
47*946379e7Schristos     {
48*946379e7Schristos       /* Put c into the buffer.  */
49*946379e7Schristos       if (++count >= FIELDLEN - 1)
50*946379e7Schristos 	{
51*946379e7Schristos 	  fprintf (stderr, "field too long\n");
52*946379e7Schristos 	  exit (EXIT_FAILURE);
53*946379e7Schristos 	}
54*946379e7Schristos       *buffer++ = c;
55*946379e7Schristos     }
56*946379e7Schristos 
57*946379e7Schristos   if (c == EOF)
58*946379e7Schristos     return 0;
59*946379e7Schristos 
60*946379e7Schristos   *buffer = '\0';
61*946379e7Schristos   return 1;
62*946379e7Schristos }
63*946379e7Schristos 
64*946379e7Schristos /* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
65*946379e7Schristos    file.  */
66*946379e7Schristos static void
fill_names(const char * unicodedata_filename)67*946379e7Schristos fill_names (const char *unicodedata_filename)
68*946379e7Schristos {
69*946379e7Schristos   unsigned int i;
70*946379e7Schristos   FILE *stream;
71*946379e7Schristos   char field0[FIELDLEN];
72*946379e7Schristos   char field1[FIELDLEN];
73*946379e7Schristos   int lineno = 0;
74*946379e7Schristos 
75*946379e7Schristos   for (i = 0; i < 0x110000; i++)
76*946379e7Schristos     unicode_names[i] = NULL;
77*946379e7Schristos 
78*946379e7Schristos   stream = fopen (unicodedata_filename, "r");
79*946379e7Schristos   if (stream == NULL)
80*946379e7Schristos     {
81*946379e7Schristos       fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
82*946379e7Schristos       exit (EXIT_FAILURE);
83*946379e7Schristos     }
84*946379e7Schristos 
85*946379e7Schristos   for (;;)
86*946379e7Schristos     {
87*946379e7Schristos       int n;
88*946379e7Schristos       int c;
89*946379e7Schristos 
90*946379e7Schristos       lineno++;
91*946379e7Schristos       n = getfield (stream, field0, ';');
92*946379e7Schristos       n += getfield (stream, field1, ';');
93*946379e7Schristos       if (n == 0)
94*946379e7Schristos 	break;
95*946379e7Schristos       if (n != 2)
96*946379e7Schristos 	{
97*946379e7Schristos 	  fprintf (stderr, "short line in '%s':%d\n",
98*946379e7Schristos 		   unicodedata_filename, lineno);
99*946379e7Schristos 	  exit (EXIT_FAILURE);
100*946379e7Schristos 	}
101*946379e7Schristos       for (; (c = getc (stream)), (c != EOF && c != '\n'); )
102*946379e7Schristos 	;
103*946379e7Schristos       i = strtoul (field0, NULL, 16);
104*946379e7Schristos       if (i >= 0x110000)
105*946379e7Schristos 	{
106*946379e7Schristos 	  fprintf (stderr, "index too large\n");
107*946379e7Schristos 	  exit (EXIT_FAILURE);
108*946379e7Schristos 	}
109*946379e7Schristos       unicode_names[i] = xstrdup (field1);
110*946379e7Schristos     }
111*946379e7Schristos   if (ferror (stream) || fclose (stream))
112*946379e7Schristos     {
113*946379e7Schristos       fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
114*946379e7Schristos       exit (1);
115*946379e7Schristos     }
116*946379e7Schristos }
117*946379e7Schristos 
118*946379e7Schristos /* Perform an exhaustive test of the unicode_character_name function.  */
119*946379e7Schristos static int
test_name_lookup()120*946379e7Schristos test_name_lookup ()
121*946379e7Schristos {
122*946379e7Schristos   int error = 0;
123*946379e7Schristos   unsigned int i;
124*946379e7Schristos   char buf[UNINAME_MAX];
125*946379e7Schristos 
126*946379e7Schristos   for (i = 0; i < 0x11000; i++)
127*946379e7Schristos     {
128*946379e7Schristos       char *result = unicode_character_name (i, buf);
129*946379e7Schristos 
130*946379e7Schristos       if (unicode_names[i] != NULL)
131*946379e7Schristos 	{
132*946379e7Schristos 	  if (result == NULL)
133*946379e7Schristos 	    {
134*946379e7Schristos 	      fprintf (stderr, "\\u%04X name lookup failed!\n", i);
135*946379e7Schristos 	      error = 1;
136*946379e7Schristos 	    }
137*946379e7Schristos 	  else if (strcmp (result, unicode_names[i]) != 0)
138*946379e7Schristos 	    {
139*946379e7Schristos 	      fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
140*946379e7Schristos 			       i, result);
141*946379e7Schristos 	      error = 1;
142*946379e7Schristos 	    }
143*946379e7Schristos 	}
144*946379e7Schristos       else
145*946379e7Schristos 	{
146*946379e7Schristos 	  if (result != NULL)
147*946379e7Schristos 	    {
148*946379e7Schristos 	      fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
149*946379e7Schristos 			       i, result);
150*946379e7Schristos 	      error = 1;
151*946379e7Schristos 	    }
152*946379e7Schristos 	}
153*946379e7Schristos     }
154*946379e7Schristos 
155*946379e7Schristos   for (i = 0x110000; i < 0x1000000; i++)
156*946379e7Schristos     {
157*946379e7Schristos       char *result = unicode_character_name (i, buf);
158*946379e7Schristos 
159*946379e7Schristos       if (result != NULL)
160*946379e7Schristos 	{
161*946379e7Schristos 	  fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
162*946379e7Schristos 			   i, result);
163*946379e7Schristos 	  error = 1;
164*946379e7Schristos 	}
165*946379e7Schristos     }
166*946379e7Schristos 
167*946379e7Schristos   return error;
168*946379e7Schristos }
169*946379e7Schristos 
170*946379e7Schristos /* Perform a test of the unicode_name_character function.  */
171*946379e7Schristos static int
test_inverse_lookup()172*946379e7Schristos test_inverse_lookup ()
173*946379e7Schristos {
174*946379e7Schristos   int error = 0;
175*946379e7Schristos   unsigned int i;
176*946379e7Schristos 
177*946379e7Schristos   /* First, verify all valid character names are recognized.  */
178*946379e7Schristos   for (i = 0; i < 0x110000; i++)
179*946379e7Schristos     if (unicode_names[i] != NULL)
180*946379e7Schristos       {
181*946379e7Schristos 	unsigned int result = unicode_name_character (unicode_names[i]);
182*946379e7Schristos 	if (result != i)
183*946379e7Schristos 	  {
184*946379e7Schristos 	    if (result == UNINAME_INVALID)
185*946379e7Schristos 	      fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
186*946379e7Schristos 		       unicode_names[i]);
187*946379e7Schristos 	    else
188*946379e7Schristos 	      fprintf (stderr,
189*946379e7Schristos 		       "inverse name lookup of \"%s\" returned 0x%04X\n",
190*946379e7Schristos 		       unicode_names[i], result);
191*946379e7Schristos 	    error = 1;
192*946379e7Schristos 	  }
193*946379e7Schristos       }
194*946379e7Schristos 
195*946379e7Schristos   /* Second, generate random but likely names and verify they are not
196*946379e7Schristos      recognized unless really valid.  */
197*946379e7Schristos   for (i = 0; i < 10000; i++)
198*946379e7Schristos     {
199*946379e7Schristos       unsigned int i1, i2;
200*946379e7Schristos       const char *s1;
201*946379e7Schristos       const char *s2;
202*946379e7Schristos       unsigned int l1, l2, j1, j2;
203*946379e7Schristos       char buf[2*UNINAME_MAX];
204*946379e7Schristos       unsigned int result;
205*946379e7Schristos 
206*946379e7Schristos       do i1 = ((rand () % 0x11) << 16)
207*946379e7Schristos 	      + ((rand () & 0xff) << 8)
208*946379e7Schristos 	      + (rand () & 0xff);
209*946379e7Schristos       while (unicode_names[i1] == NULL);
210*946379e7Schristos 
211*946379e7Schristos       do i2 = ((rand () % 0x11) << 16)
212*946379e7Schristos 	      + ((rand () & 0xff) << 8)
213*946379e7Schristos 	      + (rand () & 0xff);
214*946379e7Schristos       while (unicode_names[i2] == NULL);
215*946379e7Schristos 
216*946379e7Schristos       s1 = unicode_names[i1];
217*946379e7Schristos       l1 = strlen (s1);
218*946379e7Schristos       s2 = unicode_names[i2];
219*946379e7Schristos       l2 = strlen (s2);
220*946379e7Schristos 
221*946379e7Schristos       /* Concatenate a starting piece of s1 with an ending piece of s2.  */
222*946379e7Schristos       for (j1 = 1; j1 <= l1; j1++)
223*946379e7Schristos 	if (j1 == l1 || s1[j1] == ' ')
224*946379e7Schristos 	  for (j2 = 0; j2 < l2; j2++)
225*946379e7Schristos 	    if (j2 == 0 || s2[j2-1] == ' ')
226*946379e7Schristos 	      {
227*946379e7Schristos 		memcpy (buf, s1, j1);
228*946379e7Schristos 		buf[j1] = ' ';
229*946379e7Schristos 		memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
230*946379e7Schristos 
231*946379e7Schristos 		result = unicode_name_character (buf);
232*946379e7Schristos 		if (result != UNINAME_INVALID
233*946379e7Schristos 		    && !(unicode_names[result] != NULL
234*946379e7Schristos 			 && strcmp (unicode_names[result], buf) == 0))
235*946379e7Schristos 		  {
236*946379e7Schristos 		    fprintf (stderr,
237*946379e7Schristos 			     "inverse name lookup of \"%s\" returned 0x%04X\n",
238*946379e7Schristos 			     unicode_names[i], result);
239*946379e7Schristos 		    error = 1;
240*946379e7Schristos 		  }
241*946379e7Schristos 	      }
242*946379e7Schristos     }
243*946379e7Schristos 
244*946379e7Schristos   /* Third, some extreme case that used to loop.  */
245*946379e7Schristos   if (unicode_name_character ("A A") != UNINAME_INVALID)
246*946379e7Schristos     error = 1;
247*946379e7Schristos 
248*946379e7Schristos   return error;
249*946379e7Schristos }
250*946379e7Schristos 
251*946379e7Schristos int
main(int argc,char * argv[])252*946379e7Schristos main (int argc, char *argv[])
253*946379e7Schristos {
254*946379e7Schristos   int error = 0;
255*946379e7Schristos 
256*946379e7Schristos   fill_names (argv[1]);
257*946379e7Schristos 
258*946379e7Schristos   error |= test_name_lookup ();
259*946379e7Schristos   error |= test_inverse_lookup ();
260*946379e7Schristos 
261*946379e7Schristos   return error;
262*946379e7Schristos }
263