xref: /onnv-gate/usr/src/cmd/man/src/util/nsgmls.src/lib/CharsetInfo.cxx (revision 0:68f95e015346)
1*0Sstevel@tonic-gate // Copyright (c) 1994, 1997 James Clark
2*0Sstevel@tonic-gate // See the file COPYING for copying permission.
3*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
4*0Sstevel@tonic-gate 
5*0Sstevel@tonic-gate #ifdef __GNUG__
6*0Sstevel@tonic-gate #pragma implementation
7*0Sstevel@tonic-gate #endif
8*0Sstevel@tonic-gate #include "splib.h"
9*0Sstevel@tonic-gate #include "CharsetInfo.h"
10*0Sstevel@tonic-gate #include "ISet.h"
11*0Sstevel@tonic-gate #include "constant.h"
12*0Sstevel@tonic-gate 
13*0Sstevel@tonic-gate #ifdef SP_NAMESPACE
14*0Sstevel@tonic-gate namespace SP_NAMESPACE {
15*0Sstevel@tonic-gate #endif
16*0Sstevel@tonic-gate 
CharsetInfo(const UnivCharsetDesc & desc)17*0Sstevel@tonic-gate CharsetInfo::CharsetInfo(const UnivCharsetDesc &desc)
18*0Sstevel@tonic-gate : desc_(desc)
19*0Sstevel@tonic-gate {
20*0Sstevel@tonic-gate   // FIXME remove mappings from desc for characters greater charMax
21*0Sstevel@tonic-gate   init();
22*0Sstevel@tonic-gate }
23*0Sstevel@tonic-gate 
CharsetInfo()24*0Sstevel@tonic-gate CharsetInfo::CharsetInfo()
25*0Sstevel@tonic-gate {
26*0Sstevel@tonic-gate   inverse_.setAll(unsigned(-1));
27*0Sstevel@tonic-gate }
28*0Sstevel@tonic-gate 
set(const UnivCharsetDesc & desc)29*0Sstevel@tonic-gate void CharsetInfo::set(const UnivCharsetDesc &desc)
30*0Sstevel@tonic-gate {
31*0Sstevel@tonic-gate   desc_ = desc;
32*0Sstevel@tonic-gate   init();
33*0Sstevel@tonic-gate }
34*0Sstevel@tonic-gate 
init()35*0Sstevel@tonic-gate void CharsetInfo::init()
36*0Sstevel@tonic-gate {
37*0Sstevel@tonic-gate   inverse_.setAll(Unsigned32(-1));
38*0Sstevel@tonic-gate 
39*0Sstevel@tonic-gate   UnivCharsetDescIter iter(desc_);
40*0Sstevel@tonic-gate 
41*0Sstevel@tonic-gate   WideChar descMin, descMax;
42*0Sstevel@tonic-gate   UnivChar univMin;
43*0Sstevel@tonic-gate   while (iter.next(descMin, descMax, univMin)) {
44*0Sstevel@tonic-gate     if (univMin <= charMax) {
45*0Sstevel@tonic-gate       Char univMax;
46*0Sstevel@tonic-gate       if (charMax - univMin < descMax - descMin)
47*0Sstevel@tonic-gate 	univMax = charMax;
48*0Sstevel@tonic-gate       else
49*0Sstevel@tonic-gate 	univMax = univMin + (descMax - descMin);
50*0Sstevel@tonic-gate       Unsigned32 diff
51*0Sstevel@tonic-gate 	= ((descMin - univMin) & ((Unsigned32(1) << 31) - 1));
52*0Sstevel@tonic-gate       for (;;) {
53*0Sstevel@tonic-gate 	Char max;
54*0Sstevel@tonic-gate 	Unsigned32 n = inverse_.getRange(univMin, max);
55*0Sstevel@tonic-gate 	if (max > univMax)
56*0Sstevel@tonic-gate 	  max = univMax;
57*0Sstevel@tonic-gate 	if (n == Unsigned32(-1))
58*0Sstevel@tonic-gate 	  inverse_.setRange(univMin, max, diff);
59*0Sstevel@tonic-gate 	else if (n != Unsigned32(-2))
60*0Sstevel@tonic-gate 	  inverse_.setRange(univMin, max, Unsigned32(-2));
61*0Sstevel@tonic-gate 	if (max == univMax)
62*0Sstevel@tonic-gate 	  break;
63*0Sstevel@tonic-gate 	univMin = max + 1;
64*0Sstevel@tonic-gate       }
65*0Sstevel@tonic-gate     }
66*0Sstevel@tonic-gate   }
67*0Sstevel@tonic-gate   // These are the characters that the ANSI C
68*0Sstevel@tonic-gate   // standard guarantees will be in the basic execution
69*0Sstevel@tonic-gate   // character set.
70*0Sstevel@tonic-gate   static char execChars[] =
71*0Sstevel@tonic-gate     "\t\n\r "
72*0Sstevel@tonic-gate     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
73*0Sstevel@tonic-gate     "abcdefghijklmnopqrstuvwxyz"
74*0Sstevel@tonic-gate     "0123456789"
75*0Sstevel@tonic-gate     "!\"#%&'()*+,-./:"
76*0Sstevel@tonic-gate     ";<=>?[\\]^_{|}~";
77*0Sstevel@tonic-gate   // These are the corresponding ISO 646 codes.
78*0Sstevel@tonic-gate   static char univCodes[] = {
79*0Sstevel@tonic-gate     9, 10, 13, 32,
80*0Sstevel@tonic-gate     65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
81*0Sstevel@tonic-gate     78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
82*0Sstevel@tonic-gate     97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
83*0Sstevel@tonic-gate     110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
84*0Sstevel@tonic-gate     48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
85*0Sstevel@tonic-gate     33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 58,
86*0Sstevel@tonic-gate     59, 60, 61, 62, 63, 91, 92, 93, 94, 95, 123, 124, 125, 126,
87*0Sstevel@tonic-gate   };
88*0Sstevel@tonic-gate   for (size_t i = 0; execChars[i] != '\0'; i++) {
89*0Sstevel@tonic-gate     WideChar c;
90*0Sstevel@tonic-gate     ISet<WideChar> set;
91*0Sstevel@tonic-gate     if (univToDesc(univCodes[i], c, set) > 0 && c <= Char(-1))
92*0Sstevel@tonic-gate       execToDesc_[(unsigned char)execChars[i]] = Char(c);
93*0Sstevel@tonic-gate   }
94*0Sstevel@tonic-gate }
95*0Sstevel@tonic-gate 
getDescSet(ISet<Char> & set) const96*0Sstevel@tonic-gate void CharsetInfo::getDescSet(ISet<Char> &set) const
97*0Sstevel@tonic-gate {
98*0Sstevel@tonic-gate   UnivCharsetDescIter iter(desc_);
99*0Sstevel@tonic-gate   WideChar descMin, descMax;
100*0Sstevel@tonic-gate   UnivChar univMin;
101*0Sstevel@tonic-gate   while (iter.next(descMin, descMax, univMin)) {
102*0Sstevel@tonic-gate     if (descMin > charMax)
103*0Sstevel@tonic-gate       break;
104*0Sstevel@tonic-gate     if (descMax > charMax)
105*0Sstevel@tonic-gate       descMax = charMax;
106*0Sstevel@tonic-gate     set.addRange(Char(descMin), Char(descMax));
107*0Sstevel@tonic-gate   }
108*0Sstevel@tonic-gate }
109*0Sstevel@tonic-gate 
digitWeight(Char c) const110*0Sstevel@tonic-gate int CharsetInfo::digitWeight(Char c) const
111*0Sstevel@tonic-gate {
112*0Sstevel@tonic-gate   for (int i = 0; i < 10; i++)
113*0Sstevel@tonic-gate     if (c == execToDesc('0' + i))
114*0Sstevel@tonic-gate       return i;
115*0Sstevel@tonic-gate   return -1;
116*0Sstevel@tonic-gate }
117*0Sstevel@tonic-gate 
hexDigitWeight(Char c) const118*0Sstevel@tonic-gate int CharsetInfo::hexDigitWeight(Char c) const
119*0Sstevel@tonic-gate {
120*0Sstevel@tonic-gate   for (int i = 0; i < 10; i++)
121*0Sstevel@tonic-gate     if (c == execToDesc('0' + i))
122*0Sstevel@tonic-gate       return i;
123*0Sstevel@tonic-gate   for (int i = 0; i < 6; i++)
124*0Sstevel@tonic-gate     if (c == execToDesc('a' + i) || c == execToDesc('A' + i))
125*0Sstevel@tonic-gate       return i + 10;
126*0Sstevel@tonic-gate   return -1;
127*0Sstevel@tonic-gate }
128*0Sstevel@tonic-gate 
execToDesc(const char * s) const129*0Sstevel@tonic-gate StringC CharsetInfo::execToDesc(const char *s) const
130*0Sstevel@tonic-gate {
131*0Sstevel@tonic-gate   StringC result;
132*0Sstevel@tonic-gate   while (*s != '\0')
133*0Sstevel@tonic-gate     result += execToDesc(*s++);
134*0Sstevel@tonic-gate   return result;
135*0Sstevel@tonic-gate }
136*0Sstevel@tonic-gate 
137*0Sstevel@tonic-gate #ifdef SP_NAMESPACE
138*0Sstevel@tonic-gate }
139*0Sstevel@tonic-gate #endif
140