1*89a07cf8Schristos /* $NetBSD: unicode.cpp,v 1.1.1.1 2016/01/13 18:41:48 christos Exp $ */
2*89a07cf8Schristos
3*89a07cf8Schristos // -*- C++ -*-
4*89a07cf8Schristos /* Copyright (C) 2002
5*89a07cf8Schristos Free Software Foundation, Inc.
6*89a07cf8Schristos Written by Werner Lemberg <wl@gnu.org>
7*89a07cf8Schristos
8*89a07cf8Schristos This file is part of groff.
9*89a07cf8Schristos
10*89a07cf8Schristos groff is free software; you can redistribute it and/or modify it under
11*89a07cf8Schristos the terms of the GNU General Public License as published by the Free
12*89a07cf8Schristos Software Foundation; either version 2, or (at your option) any later
13*89a07cf8Schristos version.
14*89a07cf8Schristos
15*89a07cf8Schristos groff is distributed in the hope that it will be useful, but WITHOUT ANY
16*89a07cf8Schristos WARRANTY; without even the implied warranty of MERCHANTABILITY or
17*89a07cf8Schristos FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18*89a07cf8Schristos for more details.
19*89a07cf8Schristos
20*89a07cf8Schristos You should have received a copy of the GNU General Public License along
21*89a07cf8Schristos with groff; see the file COPYING. If not, write to the Free Software
22*89a07cf8Schristos Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
23*89a07cf8Schristos
24*89a07cf8Schristos #include "lib.h"
25*89a07cf8Schristos #include "cset.h"
26*89a07cf8Schristos #include "stringclass.h"
27*89a07cf8Schristos
28*89a07cf8Schristos #include "unicode.h"
29*89a07cf8Schristos
check_unicode_name(const char * u)30*89a07cf8Schristos const char *check_unicode_name(const char *u)
31*89a07cf8Schristos {
32*89a07cf8Schristos if (*u != 'u')
33*89a07cf8Schristos return 0;
34*89a07cf8Schristos const char *p = ++u;
35*89a07cf8Schristos for (;;) {
36*89a07cf8Schristos int val = 0;
37*89a07cf8Schristos const char *start = p;
38*89a07cf8Schristos for (;;) {
39*89a07cf8Schristos // only uppercase hex digits allowed
40*89a07cf8Schristos if (!csxdigit(*p))
41*89a07cf8Schristos return 0;
42*89a07cf8Schristos if (csdigit(*p))
43*89a07cf8Schristos val = val*0x10 + (*p-'0');
44*89a07cf8Schristos else if (csupper(*p))
45*89a07cf8Schristos val = val*0x10 + (*p-'A'+10);
46*89a07cf8Schristos else
47*89a07cf8Schristos return 0;
48*89a07cf8Schristos // biggest Unicode value is U+10FFFF
49*89a07cf8Schristos if (val > 0x10FFFF)
50*89a07cf8Schristos return 0;
51*89a07cf8Schristos p++;
52*89a07cf8Schristos if (*p == '\0' || *p == '_')
53*89a07cf8Schristos break;
54*89a07cf8Schristos }
55*89a07cf8Schristos // surrogates not allowed
56*89a07cf8Schristos if ((val >= 0xD800 && val <= 0xDBFF) || (val >= 0xDC00 && val <= 0xDFFF))
57*89a07cf8Schristos return 0;
58*89a07cf8Schristos if (val > 0xFFFF) {
59*89a07cf8Schristos if (*start == '0') // no leading zeros allowed if > 0xFFFF
60*89a07cf8Schristos return 0;
61*89a07cf8Schristos }
62*89a07cf8Schristos else if (p - start != 4) // otherwise, check for exactly 4 hex digits
63*89a07cf8Schristos return 0;
64*89a07cf8Schristos if (*p == '\0')
65*89a07cf8Schristos break;
66*89a07cf8Schristos p++;
67*89a07cf8Schristos }
68*89a07cf8Schristos return u;
69*89a07cf8Schristos }
70