xref: /onnv-gate/usr/src/cmd/man/src/util/nsgmls.src/lib/CharsetInfo.cxx (revision 0:68f95e015346)
1 // Copyright (c) 1994, 1997 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident	"%Z%%M%	%I%	%E% SMI"
4 
5 #ifdef __GNUG__
6 #pragma implementation
7 #endif
8 #include "splib.h"
9 #include "CharsetInfo.h"
10 #include "ISet.h"
11 #include "constant.h"
12 
13 #ifdef SP_NAMESPACE
14 namespace SP_NAMESPACE {
15 #endif
16 
CharsetInfo(const UnivCharsetDesc & desc)17 CharsetInfo::CharsetInfo(const UnivCharsetDesc &desc)
18 : desc_(desc)
19 {
20   // FIXME remove mappings from desc for characters greater charMax
21   init();
22 }
23 
CharsetInfo()24 CharsetInfo::CharsetInfo()
25 {
26   inverse_.setAll(unsigned(-1));
27 }
28 
set(const UnivCharsetDesc & desc)29 void CharsetInfo::set(const UnivCharsetDesc &desc)
30 {
31   desc_ = desc;
32   init();
33 }
34 
init()35 void CharsetInfo::init()
36 {
37   inverse_.setAll(Unsigned32(-1));
38 
39   UnivCharsetDescIter iter(desc_);
40 
41   WideChar descMin, descMax;
42   UnivChar univMin;
43   while (iter.next(descMin, descMax, univMin)) {
44     if (univMin <= charMax) {
45       Char univMax;
46       if (charMax - univMin < descMax - descMin)
47 	univMax = charMax;
48       else
49 	univMax = univMin + (descMax - descMin);
50       Unsigned32 diff
51 	= ((descMin - univMin) & ((Unsigned32(1) << 31) - 1));
52       for (;;) {
53 	Char max;
54 	Unsigned32 n = inverse_.getRange(univMin, max);
55 	if (max > univMax)
56 	  max = univMax;
57 	if (n == Unsigned32(-1))
58 	  inverse_.setRange(univMin, max, diff);
59 	else if (n != Unsigned32(-2))
60 	  inverse_.setRange(univMin, max, Unsigned32(-2));
61 	if (max == univMax)
62 	  break;
63 	univMin = max + 1;
64       }
65     }
66   }
67   // These are the characters that the ANSI C
68   // standard guarantees will be in the basic execution
69   // character set.
70   static char execChars[] =
71     "\t\n\r "
72     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
73     "abcdefghijklmnopqrstuvwxyz"
74     "0123456789"
75     "!\"#%&'()*+,-./:"
76     ";<=>?[\\]^_{|}~";
77   // These are the corresponding ISO 646 codes.
78   static char univCodes[] = {
79     9, 10, 13, 32,
80     65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
81     78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
82     97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
83     110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
84     48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
85     33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 58,
86     59, 60, 61, 62, 63, 91, 92, 93, 94, 95, 123, 124, 125, 126,
87   };
88   for (size_t i = 0; execChars[i] != '\0'; i++) {
89     WideChar c;
90     ISet<WideChar> set;
91     if (univToDesc(univCodes[i], c, set) > 0 && c <= Char(-1))
92       execToDesc_[(unsigned char)execChars[i]] = Char(c);
93   }
94 }
95 
getDescSet(ISet<Char> & set) const96 void CharsetInfo::getDescSet(ISet<Char> &set) const
97 {
98   UnivCharsetDescIter iter(desc_);
99   WideChar descMin, descMax;
100   UnivChar univMin;
101   while (iter.next(descMin, descMax, univMin)) {
102     if (descMin > charMax)
103       break;
104     if (descMax > charMax)
105       descMax = charMax;
106     set.addRange(Char(descMin), Char(descMax));
107   }
108 }
109 
digitWeight(Char c) const110 int CharsetInfo::digitWeight(Char c) const
111 {
112   for (int i = 0; i < 10; i++)
113     if (c == execToDesc('0' + i))
114       return i;
115   return -1;
116 }
117 
hexDigitWeight(Char c) const118 int CharsetInfo::hexDigitWeight(Char c) const
119 {
120   for (int i = 0; i < 10; i++)
121     if (c == execToDesc('0' + i))
122       return i;
123   for (int i = 0; i < 6; i++)
124     if (c == execToDesc('a' + i) || c == execToDesc('A' + i))
125       return i + 10;
126   return -1;
127 }
128 
execToDesc(const char * s) const129 StringC CharsetInfo::execToDesc(const char *s) const
130 {
131   StringC result;
132   while (*s != '\0')
133     result += execToDesc(*s++);
134   return result;
135 }
136 
137 #ifdef SP_NAMESPACE
138 }
139 #endif
140