1946379e7Schristos /* Association between Unicode characters and their names.
2946379e7Schristos Copyright (C) 2000-2002, 2005-2006 Free Software Foundation, Inc.
3946379e7Schristos
4946379e7Schristos This program is free software; you can redistribute it and/or modify
5946379e7Schristos it under the terms of the GNU General Public License as published by
6946379e7Schristos the Free Software Foundation; either version 2, or (at your option)
7946379e7Schristos any later version.
8946379e7Schristos
9946379e7Schristos This program is distributed in the hope that it will be useful,
10946379e7Schristos but WITHOUT ANY WARRANTY; without even the implied warranty of
11946379e7Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12946379e7Schristos GNU General Public License for more details.
13946379e7Schristos
14946379e7Schristos You should have received a copy of the GNU General Public License
15946379e7Schristos along with this program; if not, write to the Free Software Foundation,
16946379e7Schristos Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17946379e7Schristos
18946379e7Schristos #ifdef HAVE_CONFIG_H
19946379e7Schristos # include <config.h>
20946379e7Schristos #endif
21946379e7Schristos
22946379e7Schristos /* Specification. */
23946379e7Schristos #include "uniname.h"
24946379e7Schristos
25*95b39c65Schristos #include <sys/types.h>
26946379e7Schristos #include <assert.h>
27946379e7Schristos #include <stdbool.h>
28946379e7Schristos #include <stdio.h>
29946379e7Schristos #include <string.h>
30946379e7Schristos
31946379e7Schristos #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
32946379e7Schristos
33946379e7Schristos
34946379e7Schristos /* Table of Unicode character names, derived from UnicodeData.txt. */
35946379e7Schristos #include "uninames.h"
36946379e7Schristos /* It contains:
37946379e7Schristos static const char unicode_name_words[34594] = ...;
38946379e7Schristos #define UNICODE_CHARNAME_NUM_WORDS 5906
39946379e7Schristos static const struct { uint16_t extra_offset; uint16_t ind_offset; } unicode_name_by_length[26] = ...;
40946379e7Schristos #define UNICODE_CHARNAME_WORD_HANGUL 3624
41946379e7Schristos #define UNICODE_CHARNAME_WORD_SYLLABLE 4654
42946379e7Schristos #define UNICODE_CHARNAME_WORD_CJK 401
43946379e7Schristos #define UNICODE_CHARNAME_WORD_COMPATIBILITY 5755
44946379e7Schristos static const uint16_t unicode_names[62620] = ...;
45946379e7Schristos static const struct { uint16_t code; uint16_t name; } unicode_name_to_code[15257] = ...;
46946379e7Schristos static const struct { uint16_t code; uint16_t name; } unicode_code_to_name[15257] = ...;
47946379e7Schristos #define UNICODE_CHARNAME_MAX_LENGTH 83
48946379e7Schristos #define UNICODE_CHARNAME_MAX_WORDS 13
49946379e7Schristos */
50946379e7Schristos
51946379e7Schristos /* Returns the word with a given index. */
52946379e7Schristos static const char *
unicode_name_word(unsigned int index,unsigned int * lengthp)53946379e7Schristos unicode_name_word (unsigned int index, unsigned int *lengthp)
54946379e7Schristos {
55946379e7Schristos unsigned int i1;
56946379e7Schristos unsigned int i2;
57946379e7Schristos unsigned int i;
58946379e7Schristos
59946379e7Schristos assert (index < UNICODE_CHARNAME_NUM_WORDS);
60946379e7Schristos
61946379e7Schristos /* Binary search for i with
62946379e7Schristos unicode_name_by_length[i].ind_offset <= index
63946379e7Schristos and
64946379e7Schristos index < unicode_name_by_length[i+1].ind_offset
65946379e7Schristos */
66946379e7Schristos
67946379e7Schristos i1 = 0;
68946379e7Schristos i2 = SIZEOF (unicode_name_by_length) - 1;
69946379e7Schristos while (i2 - i1 > 1)
70946379e7Schristos {
71946379e7Schristos unsigned int i = (i1 + i2) >> 1;
72946379e7Schristos if (unicode_name_by_length[i].ind_offset <= index)
73946379e7Schristos i1 = i;
74946379e7Schristos else
75946379e7Schristos i2 = i;
76946379e7Schristos }
77946379e7Schristos i = i1;
78946379e7Schristos assert (unicode_name_by_length[i].ind_offset <= index
79946379e7Schristos && index < unicode_name_by_length[i+1].ind_offset);
80946379e7Schristos *lengthp = i;
81946379e7Schristos return &unicode_name_words[unicode_name_by_length[i].extra_offset
82946379e7Schristos + (index-unicode_name_by_length[i].ind_offset)*i];
83946379e7Schristos }
84946379e7Schristos
85946379e7Schristos /* Looks up the index of a word. */
86946379e7Schristos static int
unicode_name_word_lookup(const char * word,unsigned int length)87946379e7Schristos unicode_name_word_lookup (const char *word, unsigned int length)
88946379e7Schristos {
89946379e7Schristos if (length > 0 && length < SIZEOF (unicode_name_by_length) - 1)
90946379e7Schristos {
91946379e7Schristos /* Binary search among the words of given length. */
92946379e7Schristos unsigned int extra_offset = unicode_name_by_length[length].extra_offset;
93946379e7Schristos unsigned int i0 = unicode_name_by_length[length].ind_offset;
94946379e7Schristos unsigned int i1 = i0;
95946379e7Schristos unsigned int i2 = unicode_name_by_length[length+1].ind_offset;
96946379e7Schristos while (i2 - i1 > 0)
97946379e7Schristos {
98946379e7Schristos unsigned int i = (i1 + i2) >> 1;
99946379e7Schristos const char *p = &unicode_name_words[extra_offset + (i-i0)*length];
100946379e7Schristos const char *w = word;
101946379e7Schristos unsigned int n = length;
102946379e7Schristos for (;;)
103946379e7Schristos {
104946379e7Schristos if (*p < *w)
105946379e7Schristos {
106946379e7Schristos if (i1 == i)
107946379e7Schristos return -1;
108946379e7Schristos /* Note here: i1 < i < i2. */
109946379e7Schristos i1 = i;
110946379e7Schristos break;
111946379e7Schristos }
112946379e7Schristos if (*p > *w)
113946379e7Schristos {
114946379e7Schristos /* Note here: i1 <= i < i2. */
115946379e7Schristos i2 = i;
116946379e7Schristos break;
117946379e7Schristos }
118946379e7Schristos p++; w++; n--;
119946379e7Schristos if (n == 0)
120946379e7Schristos return i;
121946379e7Schristos }
122946379e7Schristos }
123946379e7Schristos }
124946379e7Schristos return -1;
125946379e7Schristos }
126946379e7Schristos
127946379e7Schristos /* Auxiliary tables for Hangul syllable names, see the Unicode 3.0 book,
128946379e7Schristos sections 3.11 and 4.4. */
129946379e7Schristos static const char jamo_initial_short_name[19][3] =
130946379e7Schristos {
131946379e7Schristos "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", "S", "SS", "", "J", "JJ",
132946379e7Schristos "C", "K", "T", "P", "H"
133946379e7Schristos };
134946379e7Schristos static const char jamo_medial_short_name[21][4] =
135946379e7Schristos {
136946379e7Schristos "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA", "WAE", "OE", "YO",
137946379e7Schristos "U", "WEO", "WE", "WI", "YU", "EU", "YI", "I"
138946379e7Schristos };
139946379e7Schristos static const char jamo_final_short_name[28][3] =
140946379e7Schristos {
141946379e7Schristos "", "G", "GG", "GS", "N", "NI", "NH", "D", "L", "LG", "LM", "LB", "LS", "LT",
142946379e7Schristos "LP", "LH", "M", "B", "BS", "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
143946379e7Schristos };
144946379e7Schristos
145946379e7Schristos /* Looks up the name of a Unicode character, in uppercase ASCII.
146946379e7Schristos Returns the filled buf, or NULL if the character does not have a name. */
147946379e7Schristos char *
unicode_character_name(unsigned int c,char * buf)148946379e7Schristos unicode_character_name (unsigned int c, char *buf)
149946379e7Schristos {
150946379e7Schristos if (c >= 0xAC00 && c <= 0xD7A3)
151946379e7Schristos {
152946379e7Schristos /* Special case for Hangul syllables. Keeps the tables small. */
153946379e7Schristos char *ptr;
154946379e7Schristos unsigned int tmp;
155946379e7Schristos unsigned int index1;
156946379e7Schristos unsigned int index2;
157946379e7Schristos unsigned int index3;
158946379e7Schristos const char *q;
159946379e7Schristos
160946379e7Schristos /* buf needs to have at least 16 + 7 bytes here. */
161946379e7Schristos memcpy (buf, "HANGUL SYLLABLE ", 16);
162946379e7Schristos ptr = buf + 16;
163946379e7Schristos
164946379e7Schristos tmp = c - 0xAC00;
165946379e7Schristos index3 = tmp % 28; tmp = tmp / 28;
166946379e7Schristos index2 = tmp % 21; tmp = tmp / 21;
167946379e7Schristos index1 = tmp;
168946379e7Schristos
169946379e7Schristos q = jamo_initial_short_name[index1];
170946379e7Schristos while (*q != '\0')
171946379e7Schristos *ptr++ = *q++;
172946379e7Schristos q = jamo_medial_short_name[index2];
173946379e7Schristos while (*q != '\0')
174946379e7Schristos *ptr++ = *q++;
175946379e7Schristos q = jamo_final_short_name[index3];
176946379e7Schristos while (*q != '\0')
177946379e7Schristos *ptr++ = *q++;
178946379e7Schristos *ptr = '\0';
179946379e7Schristos return buf;
180946379e7Schristos }
181946379e7Schristos else if ((c >= 0xF900 && c <= 0xFA2D) || (c >= 0xFA30 && c <= 0xFA6A)
182946379e7Schristos || (c >= 0xFA70 && c <= 0xFAD9) || (c >= 0x2F800 && c <= 0x2FA1D))
183946379e7Schristos {
184946379e7Schristos /* Special case for CJK compatibility ideographs. Keeps the tables
185946379e7Schristos small. */
186946379e7Schristos char *ptr;
187946379e7Schristos int i;
188946379e7Schristos
189946379e7Schristos /* buf needs to have at least 28 + 5 bytes here. */
190946379e7Schristos memcpy (buf, "CJK COMPATIBILITY IDEOGRAPH-", 28);
191946379e7Schristos ptr = buf + 28;
192946379e7Schristos
193946379e7Schristos for (i = (c < 0x10000 ? 12 : 16); i >= 0; i -= 4)
194946379e7Schristos {
195946379e7Schristos unsigned int x = (c >> i) & 0xf;
196946379e7Schristos *ptr++ = (x < 10 ? '0' : 'A' - 10) + x;
197946379e7Schristos }
198946379e7Schristos *ptr = '\0';
199946379e7Schristos return buf;
200946379e7Schristos }
201946379e7Schristos else
202946379e7Schristos {
203946379e7Schristos const uint16_t *words;
204946379e7Schristos
205946379e7Schristos /* Transform the code so that it fits in 16 bits. */
206946379e7Schristos switch (c >> 12)
207946379e7Schristos {
208946379e7Schristos case 0x00: case 0x01: case 0x02: case 0x03: case 0x04:
209946379e7Schristos break;
210946379e7Schristos case 0x0A:
211946379e7Schristos c -= 0x05000;
212946379e7Schristos break;
213946379e7Schristos case 0x0F:
214946379e7Schristos c -= 0x09000;
215946379e7Schristos break;
216946379e7Schristos case 0x10:
217946379e7Schristos c -= 0x09000;
218946379e7Schristos break;
219946379e7Schristos case 0x1D:
220946379e7Schristos c -= 0x15000;
221946379e7Schristos break;
222946379e7Schristos case 0x2F:
223946379e7Schristos c -= 0x26000;
224946379e7Schristos break;
225946379e7Schristos case 0xE0:
226946379e7Schristos c -= 0xD6000;
227946379e7Schristos break;
228946379e7Schristos default:
229946379e7Schristos return NULL;
230946379e7Schristos }
231946379e7Schristos
232946379e7Schristos {
233946379e7Schristos /* Binary search in unicode_code_to_name. */
234946379e7Schristos unsigned int i1 = 0;
235946379e7Schristos unsigned int i2 = SIZEOF (unicode_code_to_name);
236946379e7Schristos for (;;)
237946379e7Schristos {
238946379e7Schristos unsigned int i = (i1 + i2) >> 1;
239946379e7Schristos if (unicode_code_to_name[i].code == c)
240946379e7Schristos {
241946379e7Schristos words = &unicode_names[unicode_code_to_name[i].name];
242946379e7Schristos break;
243946379e7Schristos }
244946379e7Schristos else if (unicode_code_to_name[i].code < c)
245946379e7Schristos {
246946379e7Schristos if (i1 == i)
247946379e7Schristos {
248946379e7Schristos words = NULL;
249946379e7Schristos break;
250946379e7Schristos }
251946379e7Schristos /* Note here: i1 < i < i2. */
252946379e7Schristos i1 = i;
253946379e7Schristos }
254946379e7Schristos else if (unicode_code_to_name[i].code > c)
255946379e7Schristos {
256946379e7Schristos if (i2 == i)
257946379e7Schristos {
258946379e7Schristos words = NULL;
259946379e7Schristos break;
260946379e7Schristos }
261946379e7Schristos /* Note here: i1 <= i < i2. */
262946379e7Schristos i2 = i;
263946379e7Schristos }
264946379e7Schristos }
265946379e7Schristos }
266946379e7Schristos if (words != NULL)
267946379e7Schristos {
268946379e7Schristos /* Found it in unicode_code_to_name. Now concatenate the words. */
269946379e7Schristos /* buf needs to have at least UNICODE_CHARNAME_MAX_LENGTH bytes. */
270946379e7Schristos char *ptr = buf;
271946379e7Schristos for (;;)
272946379e7Schristos {
273946379e7Schristos unsigned int wordlen;
274946379e7Schristos const char *word = unicode_name_word (*words>>1, &wordlen);
275946379e7Schristos do
276946379e7Schristos *ptr++ = *word++;
277946379e7Schristos while (--wordlen > 0);
278946379e7Schristos if ((*words & 1) == 0)
279946379e7Schristos break;
280946379e7Schristos *ptr++ = ' ';
281946379e7Schristos words++;
282946379e7Schristos }
283946379e7Schristos *ptr = '\0';
284946379e7Schristos return buf;
285946379e7Schristos }
286946379e7Schristos return NULL;
287946379e7Schristos }
288946379e7Schristos }
289946379e7Schristos
290946379e7Schristos /* Looks up the Unicode character with a given name, in upper- or lowercase
291946379e7Schristos ASCII. Returns the character if found, or UNINAME_INVALID if not found. */
292946379e7Schristos unsigned int
unicode_name_character(const char * name)293946379e7Schristos unicode_name_character (const char *name)
294946379e7Schristos {
295946379e7Schristos unsigned int len = strlen (name);
296946379e7Schristos if (len > 1 && len <= UNICODE_CHARNAME_MAX_LENGTH)
297946379e7Schristos {
298946379e7Schristos /* Test for "word1 word2 ..." syntax. */
299946379e7Schristos char buf[UNICODE_CHARNAME_MAX_LENGTH];
300946379e7Schristos char *ptr = buf;
301946379e7Schristos for (;;)
302946379e7Schristos {
303946379e7Schristos char c = *name++;
304946379e7Schristos if (!(c >= ' ' && c <= '~'))
305946379e7Schristos break;
306946379e7Schristos *ptr++ = (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
307946379e7Schristos if (--len == 0)
308946379e7Schristos goto filled_buf;
309946379e7Schristos }
310946379e7Schristos if (false)
311946379e7Schristos filled_buf:
312946379e7Schristos {
313946379e7Schristos /* Convert the constituents to uint16_t words. */
314946379e7Schristos uint16_t words[UNICODE_CHARNAME_MAX_WORDS];
315946379e7Schristos uint16_t *wordptr = words;
316946379e7Schristos {
317946379e7Schristos const char *p1 = buf;
318946379e7Schristos for (;;)
319946379e7Schristos {
320946379e7Schristos {
321946379e7Schristos int word;
322946379e7Schristos const char *p2 = p1;
323946379e7Schristos while (p2 < ptr && *p2 != ' ')
324946379e7Schristos p2++;
325946379e7Schristos word = unicode_name_word_lookup (p1, p2 - p1);
326946379e7Schristos if (word < 0)
327946379e7Schristos break;
328946379e7Schristos if (wordptr == &words[UNICODE_CHARNAME_MAX_WORDS])
329946379e7Schristos break;
330946379e7Schristos *wordptr++ = word;
331946379e7Schristos if (p2 == ptr)
332946379e7Schristos goto filled_words;
333946379e7Schristos p1 = p2 + 1;
334946379e7Schristos }
335946379e7Schristos /* Special case for Hangul syllables. Keeps the tables small. */
336946379e7Schristos if (wordptr == &words[2]
337946379e7Schristos && words[0] == UNICODE_CHARNAME_WORD_HANGUL
338946379e7Schristos && words[1] == UNICODE_CHARNAME_WORD_SYLLABLE)
339946379e7Schristos {
340946379e7Schristos /* Split the last word [p1..ptr) into three parts:
341946379e7Schristos 1) [BCDGHJKMNPRST]
342946379e7Schristos 2) [AEIOUWY]
343946379e7Schristos 3) [BCDGHIJKLMNPST]
344946379e7Schristos */
345946379e7Schristos const char *p2;
346946379e7Schristos const char *p3;
347946379e7Schristos const char *p4;
348946379e7Schristos
349946379e7Schristos p2 = p1;
350946379e7Schristos while (p2 < ptr
351946379e7Schristos && (*p2 == 'B' || *p2 == 'C' || *p2 == 'D'
352946379e7Schristos || *p2 == 'G' || *p2 == 'H' || *p2 == 'J'
353946379e7Schristos || *p2 == 'K' || *p2 == 'M' || *p2 == 'N'
354946379e7Schristos || *p2 == 'P' || *p2 == 'R' || *p2 == 'S'
355946379e7Schristos || *p2 == 'T'))
356946379e7Schristos p2++;
357946379e7Schristos p3 = p2;
358946379e7Schristos while (p3 < ptr
359946379e7Schristos && (*p3 == 'A' || *p3 == 'E' || *p3 == 'I'
360946379e7Schristos || *p3 == 'O' || *p3 == 'U' || *p3 == 'W'
361946379e7Schristos || *p3 == 'Y'))
362946379e7Schristos p3++;
363946379e7Schristos p4 = p3;
364946379e7Schristos while (p4 < ptr
365946379e7Schristos && (*p4 == 'B' || *p4 == 'C' || *p4 == 'D'
366946379e7Schristos || *p4 == 'G' || *p4 == 'H' || *p4 == 'I'
367946379e7Schristos || *p4 == 'J' || *p4 == 'K' || *p4 == 'L'
368946379e7Schristos || *p4 == 'M' || *p4 == 'N' || *p4 == 'P'
369946379e7Schristos || *p4 == 'S' || *p4 == 'T'))
370946379e7Schristos p4++;
371946379e7Schristos if (p4 == ptr)
372946379e7Schristos {
373946379e7Schristos unsigned int n1 = p2 - p1;
374946379e7Schristos unsigned int n2 = p3 - p2;
375946379e7Schristos unsigned int n3 = p4 - p3;
376946379e7Schristos
377946379e7Schristos if (n1 <= 2 && (n2 >= 1 && n2 <= 3) && n3 <= 2)
378946379e7Schristos {
379946379e7Schristos unsigned int index1;
380946379e7Schristos
381946379e7Schristos for (index1 = 0; index1 < 19; index1++)
382946379e7Schristos if (memcmp(jamo_initial_short_name[index1], p1, n1) == 0
383946379e7Schristos && jamo_initial_short_name[index1][n1] == '\0')
384946379e7Schristos {
385946379e7Schristos unsigned int index2;
386946379e7Schristos
387946379e7Schristos for (index2 = 0; index2 < 21; index2++)
388946379e7Schristos if (memcmp(jamo_medial_short_name[index2], p2, n2) == 0
389946379e7Schristos && jamo_medial_short_name[index2][n2] == '\0')
390946379e7Schristos {
391946379e7Schristos unsigned int index3;
392946379e7Schristos
393946379e7Schristos for (index3 = 0; index3 < 28; index3++)
394946379e7Schristos if (memcmp(jamo_final_short_name[index3], p3, n3) == 0
395946379e7Schristos && jamo_final_short_name[index3][n3] == '\0')
396946379e7Schristos {
397946379e7Schristos return 0xAC00 + (index1 * 21 + index2) * 28 + index3;
398946379e7Schristos }
399946379e7Schristos break;
400946379e7Schristos }
401946379e7Schristos break;
402946379e7Schristos }
403946379e7Schristos }
404946379e7Schristos }
405946379e7Schristos }
406946379e7Schristos /* Special case for CJK compatibility ideographs. Keeps the
407946379e7Schristos tables small. */
408946379e7Schristos if (wordptr == &words[2]
409946379e7Schristos && words[0] == UNICODE_CHARNAME_WORD_CJK
410946379e7Schristos && words[1] == UNICODE_CHARNAME_WORD_COMPATIBILITY
411946379e7Schristos && p1 + 14 <= ptr
412946379e7Schristos && p1 + 15 >= ptr
413946379e7Schristos && memcmp (p1, "IDEOGRAPH-", 10) == 0)
414946379e7Schristos {
415946379e7Schristos const char *p2 = p1 + 10;
416946379e7Schristos
417946379e7Schristos if (*p2 != '0')
418946379e7Schristos {
419946379e7Schristos unsigned int c = 0;
420946379e7Schristos
421946379e7Schristos for (;;)
422946379e7Schristos {
423946379e7Schristos if (*p2 >= '0' && *p2 <= '9')
424946379e7Schristos c += (*p2 - '0');
425946379e7Schristos else if (*p2 >= 'A' && *p2 <= 'F')
426946379e7Schristos c += (*p2 - 'A' + 10);
427946379e7Schristos else
428946379e7Schristos break;
429946379e7Schristos p2++;
430946379e7Schristos if (p2 == ptr)
431946379e7Schristos {
432946379e7Schristos if ((c >= 0xF900 && c <= 0xFA2D)
433946379e7Schristos || (c >= 0xFA30 && c <= 0xFA6A)
434946379e7Schristos || (c >= 0xFA70 && c <= 0xFAD9)
435946379e7Schristos || (c >= 0x2F800 && c <= 0x2FA1D))
436946379e7Schristos return c;
437946379e7Schristos else
438946379e7Schristos break;
439946379e7Schristos }
440946379e7Schristos c = c << 4;
441946379e7Schristos }
442946379e7Schristos }
443946379e7Schristos }
444946379e7Schristos }
445946379e7Schristos }
446946379e7Schristos if (false)
447946379e7Schristos filled_words:
448946379e7Schristos {
449946379e7Schristos /* Multiply by 2, to simplify later comparisons. */
450946379e7Schristos unsigned int words_length = wordptr - words;
451946379e7Schristos {
452946379e7Schristos int i = words_length - 1;
453946379e7Schristos words[i] = 2 * words[i];
454946379e7Schristos for (; --i >= 0; )
455946379e7Schristos words[i] = 2 * words[i] + 1;
456946379e7Schristos }
457946379e7Schristos /* Binary search in unicode_name_to_code. */
458946379e7Schristos {
459946379e7Schristos unsigned int i1 = 0;
460946379e7Schristos unsigned int i2 = SIZEOF (unicode_name_to_code);
461946379e7Schristos for (;;)
462946379e7Schristos {
463946379e7Schristos unsigned int i = (i1 + i2) >> 1;
464946379e7Schristos const uint16_t *w = words;
465946379e7Schristos const uint16_t *p = &unicode_names[unicode_name_to_code[i].name];
466946379e7Schristos unsigned int n = words_length;
467946379e7Schristos for (;;)
468946379e7Schristos {
469946379e7Schristos if (*p < *w)
470946379e7Schristos {
471946379e7Schristos if (i1 == i)
472946379e7Schristos goto name_not_found;
473946379e7Schristos /* Note here: i1 < i < i2. */
474946379e7Schristos i1 = i;
475946379e7Schristos break;
476946379e7Schristos }
477946379e7Schristos else if (*p > *w)
478946379e7Schristos {
479946379e7Schristos if (i2 == i)
480946379e7Schristos goto name_not_found;
481946379e7Schristos /* Note here: i1 <= i < i2. */
482946379e7Schristos i2 = i;
483946379e7Schristos break;
484946379e7Schristos }
485946379e7Schristos p++; w++; n--;
486946379e7Schristos if (n == 0)
487946379e7Schristos {
488946379e7Schristos unsigned int c = unicode_name_to_code[i].code;
489946379e7Schristos
490946379e7Schristos /* Undo the transformation to 16-bit space. */
491946379e7Schristos static const unsigned int offset[11] =
492946379e7Schristos {
493946379e7Schristos 0x00000, 0x00000, 0x00000, 0x00000, 0x00000,
494946379e7Schristos 0x05000, 0x09000, 0x09000, 0x15000, 0x26000,
495946379e7Schristos 0xD6000
496946379e7Schristos };
497946379e7Schristos return c + offset[c >> 12];
498946379e7Schristos }
499946379e7Schristos }
500946379e7Schristos }
501946379e7Schristos }
502946379e7Schristos name_not_found: ;
503946379e7Schristos }
504946379e7Schristos }
505946379e7Schristos }
506946379e7Schristos return UNINAME_INVALID;
507946379e7Schristos }
508