1 /* $NetBSD: unicode.cpp,v 1.1.1.1 2016/01/13 18:41:48 christos Exp $ */ 2 3 // -*- C++ -*- 4 /* Copyright (C) 2002 5 Free Software Foundation, Inc. 6 Written by Werner Lemberg <wl@gnu.org> 7 8 This file is part of groff. 9 10 groff is free software; you can redistribute it and/or modify it under 11 the terms of the GNU General Public License as published by the Free 12 Software Foundation; either version 2, or (at your option) any later 13 version. 14 15 groff is distributed in the hope that it will be useful, but WITHOUT ANY 16 WARRANTY; without even the implied warranty of MERCHANTABILITY or 17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 18 for more details. 19 20 You should have received a copy of the GNU General Public License along 21 with groff; see the file COPYING. If not, write to the Free Software 22 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */ 23 24 #include "lib.h" 25 #include "cset.h" 26 #include "stringclass.h" 27 28 #include "unicode.h" 29 30 const char *check_unicode_name(const char *u) 31 { 32 if (*u != 'u') 33 return 0; 34 const char *p = ++u; 35 for (;;) { 36 int val = 0; 37 const char *start = p; 38 for (;;) { 39 // only uppercase hex digits allowed 40 if (!csxdigit(*p)) 41 return 0; 42 if (csdigit(*p)) 43 val = val*0x10 + (*p-'0'); 44 else if (csupper(*p)) 45 val = val*0x10 + (*p-'A'+10); 46 else 47 return 0; 48 // biggest Unicode value is U+10FFFF 49 if (val > 0x10FFFF) 50 return 0; 51 p++; 52 if (*p == '\0' || *p == '_') 53 break; 54 } 55 // surrogates not allowed 56 if ((val >= 0xD800 && val <= 0xDBFF) || (val >= 0xDC00 && val <= 0xDFFF)) 57 return 0; 58 if (val > 0xFFFF) { 59 if (*start == '0') // no leading zeros allowed if > 0xFFFF 60 return 0; 61 } 62 else if (p - start != 4) // otherwise, check for exactly 4 hex digits 63 return 0; 64 if (*p == '\0') 65 break; 66 p++; 67 } 68 return u; 69 } 70