xref: /netbsd-src/external/gpl2/groff/dist/src/libs/libgroff/unicode.cpp (revision 89a07cf815a29524268025a1139fac4c5190f765)
1 /*	$NetBSD: unicode.cpp,v 1.1.1.1 2016/01/13 18:41:48 christos Exp $	*/
2 
3 // -*- C++ -*-
4 /* Copyright (C) 2002
5    Free Software Foundation, Inc.
6      Written by Werner Lemberg <wl@gnu.org>
7 
8 This file is part of groff.
9 
10 groff is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
14 
15 groff is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18 for more details.
19 
20 You should have received a copy of the GNU General Public License along
21 with groff; see the file COPYING.  If not, write to the Free Software
22 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
23 
24 #include "lib.h"
25 #include "cset.h"
26 #include "stringclass.h"
27 
28 #include "unicode.h"
29 
check_unicode_name(const char * u)30 const char *check_unicode_name(const char *u)
31 {
32   if (*u != 'u')
33     return 0;
34   const char *p = ++u;
35   for (;;) {
36     int val = 0;
37     const char *start = p;
38     for (;;) {
39       // only uppercase hex digits allowed
40       if (!csxdigit(*p))
41 	return 0;
42       if (csdigit(*p))
43 	val = val*0x10 + (*p-'0');
44       else if (csupper(*p))
45 	val = val*0x10 + (*p-'A'+10);
46       else
47 	return 0;
48       // biggest Unicode value is U+10FFFF
49       if (val > 0x10FFFF)
50 	return 0;
51       p++;
52       if (*p == '\0' || *p == '_')
53 	break;
54     }
55     // surrogates not allowed
56     if ((val >= 0xD800 && val <= 0xDBFF) || (val >= 0xDC00 && val <= 0xDFFF))
57       return 0;
58     if (val > 0xFFFF) {
59       if (*start == '0')	// no leading zeros allowed if > 0xFFFF
60 	return 0;
61     }
62     else if (p - start != 4)	// otherwise, check for exactly 4 hex digits
63       return 0;
64     if (*p == '\0')
65       break;
66     p++;
67   }
68   return u;
69 }
70