1*0Sstevel@tonic-gate // Copyright (c) 1994, 1997 James Clark
2*0Sstevel@tonic-gate // See the file COPYING for copying permission.
3*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
4*0Sstevel@tonic-gate
5*0Sstevel@tonic-gate #ifdef __GNUG__
6*0Sstevel@tonic-gate #pragma implementation
7*0Sstevel@tonic-gate #endif
8*0Sstevel@tonic-gate #include "splib.h"
9*0Sstevel@tonic-gate #include "CharsetInfo.h"
10*0Sstevel@tonic-gate #include "ISet.h"
11*0Sstevel@tonic-gate #include "constant.h"
12*0Sstevel@tonic-gate
13*0Sstevel@tonic-gate #ifdef SP_NAMESPACE
14*0Sstevel@tonic-gate namespace SP_NAMESPACE {
15*0Sstevel@tonic-gate #endif
16*0Sstevel@tonic-gate
CharsetInfo(const UnivCharsetDesc & desc)17*0Sstevel@tonic-gate CharsetInfo::CharsetInfo(const UnivCharsetDesc &desc)
18*0Sstevel@tonic-gate : desc_(desc)
19*0Sstevel@tonic-gate {
20*0Sstevel@tonic-gate // FIXME remove mappings from desc for characters greater charMax
21*0Sstevel@tonic-gate init();
22*0Sstevel@tonic-gate }
23*0Sstevel@tonic-gate
CharsetInfo()24*0Sstevel@tonic-gate CharsetInfo::CharsetInfo()
25*0Sstevel@tonic-gate {
26*0Sstevel@tonic-gate inverse_.setAll(unsigned(-1));
27*0Sstevel@tonic-gate }
28*0Sstevel@tonic-gate
set(const UnivCharsetDesc & desc)29*0Sstevel@tonic-gate void CharsetInfo::set(const UnivCharsetDesc &desc)
30*0Sstevel@tonic-gate {
31*0Sstevel@tonic-gate desc_ = desc;
32*0Sstevel@tonic-gate init();
33*0Sstevel@tonic-gate }
34*0Sstevel@tonic-gate
init()35*0Sstevel@tonic-gate void CharsetInfo::init()
36*0Sstevel@tonic-gate {
37*0Sstevel@tonic-gate inverse_.setAll(Unsigned32(-1));
38*0Sstevel@tonic-gate
39*0Sstevel@tonic-gate UnivCharsetDescIter iter(desc_);
40*0Sstevel@tonic-gate
41*0Sstevel@tonic-gate WideChar descMin, descMax;
42*0Sstevel@tonic-gate UnivChar univMin;
43*0Sstevel@tonic-gate while (iter.next(descMin, descMax, univMin)) {
44*0Sstevel@tonic-gate if (univMin <= charMax) {
45*0Sstevel@tonic-gate Char univMax;
46*0Sstevel@tonic-gate if (charMax - univMin < descMax - descMin)
47*0Sstevel@tonic-gate univMax = charMax;
48*0Sstevel@tonic-gate else
49*0Sstevel@tonic-gate univMax = univMin + (descMax - descMin);
50*0Sstevel@tonic-gate Unsigned32 diff
51*0Sstevel@tonic-gate = ((descMin - univMin) & ((Unsigned32(1) << 31) - 1));
52*0Sstevel@tonic-gate for (;;) {
53*0Sstevel@tonic-gate Char max;
54*0Sstevel@tonic-gate Unsigned32 n = inverse_.getRange(univMin, max);
55*0Sstevel@tonic-gate if (max > univMax)
56*0Sstevel@tonic-gate max = univMax;
57*0Sstevel@tonic-gate if (n == Unsigned32(-1))
58*0Sstevel@tonic-gate inverse_.setRange(univMin, max, diff);
59*0Sstevel@tonic-gate else if (n != Unsigned32(-2))
60*0Sstevel@tonic-gate inverse_.setRange(univMin, max, Unsigned32(-2));
61*0Sstevel@tonic-gate if (max == univMax)
62*0Sstevel@tonic-gate break;
63*0Sstevel@tonic-gate univMin = max + 1;
64*0Sstevel@tonic-gate }
65*0Sstevel@tonic-gate }
66*0Sstevel@tonic-gate }
67*0Sstevel@tonic-gate // These are the characters that the ANSI C
68*0Sstevel@tonic-gate // standard guarantees will be in the basic execution
69*0Sstevel@tonic-gate // character set.
70*0Sstevel@tonic-gate static char execChars[] =
71*0Sstevel@tonic-gate "\t\n\r "
72*0Sstevel@tonic-gate "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
73*0Sstevel@tonic-gate "abcdefghijklmnopqrstuvwxyz"
74*0Sstevel@tonic-gate "0123456789"
75*0Sstevel@tonic-gate "!\"#%&'()*+,-./:"
76*0Sstevel@tonic-gate ";<=>?[\\]^_{|}~";
77*0Sstevel@tonic-gate // These are the corresponding ISO 646 codes.
78*0Sstevel@tonic-gate static char univCodes[] = {
79*0Sstevel@tonic-gate 9, 10, 13, 32,
80*0Sstevel@tonic-gate 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
81*0Sstevel@tonic-gate 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
82*0Sstevel@tonic-gate 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
83*0Sstevel@tonic-gate 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
84*0Sstevel@tonic-gate 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
85*0Sstevel@tonic-gate 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 58,
86*0Sstevel@tonic-gate 59, 60, 61, 62, 63, 91, 92, 93, 94, 95, 123, 124, 125, 126,
87*0Sstevel@tonic-gate };
88*0Sstevel@tonic-gate for (size_t i = 0; execChars[i] != '\0'; i++) {
89*0Sstevel@tonic-gate WideChar c;
90*0Sstevel@tonic-gate ISet<WideChar> set;
91*0Sstevel@tonic-gate if (univToDesc(univCodes[i], c, set) > 0 && c <= Char(-1))
92*0Sstevel@tonic-gate execToDesc_[(unsigned char)execChars[i]] = Char(c);
93*0Sstevel@tonic-gate }
94*0Sstevel@tonic-gate }
95*0Sstevel@tonic-gate
getDescSet(ISet<Char> & set) const96*0Sstevel@tonic-gate void CharsetInfo::getDescSet(ISet<Char> &set) const
97*0Sstevel@tonic-gate {
98*0Sstevel@tonic-gate UnivCharsetDescIter iter(desc_);
99*0Sstevel@tonic-gate WideChar descMin, descMax;
100*0Sstevel@tonic-gate UnivChar univMin;
101*0Sstevel@tonic-gate while (iter.next(descMin, descMax, univMin)) {
102*0Sstevel@tonic-gate if (descMin > charMax)
103*0Sstevel@tonic-gate break;
104*0Sstevel@tonic-gate if (descMax > charMax)
105*0Sstevel@tonic-gate descMax = charMax;
106*0Sstevel@tonic-gate set.addRange(Char(descMin), Char(descMax));
107*0Sstevel@tonic-gate }
108*0Sstevel@tonic-gate }
109*0Sstevel@tonic-gate
digitWeight(Char c) const110*0Sstevel@tonic-gate int CharsetInfo::digitWeight(Char c) const
111*0Sstevel@tonic-gate {
112*0Sstevel@tonic-gate for (int i = 0; i < 10; i++)
113*0Sstevel@tonic-gate if (c == execToDesc('0' + i))
114*0Sstevel@tonic-gate return i;
115*0Sstevel@tonic-gate return -1;
116*0Sstevel@tonic-gate }
117*0Sstevel@tonic-gate
hexDigitWeight(Char c) const118*0Sstevel@tonic-gate int CharsetInfo::hexDigitWeight(Char c) const
119*0Sstevel@tonic-gate {
120*0Sstevel@tonic-gate for (int i = 0; i < 10; i++)
121*0Sstevel@tonic-gate if (c == execToDesc('0' + i))
122*0Sstevel@tonic-gate return i;
123*0Sstevel@tonic-gate for (int i = 0; i < 6; i++)
124*0Sstevel@tonic-gate if (c == execToDesc('a' + i) || c == execToDesc('A' + i))
125*0Sstevel@tonic-gate return i + 10;
126*0Sstevel@tonic-gate return -1;
127*0Sstevel@tonic-gate }
128*0Sstevel@tonic-gate
execToDesc(const char * s) const129*0Sstevel@tonic-gate StringC CharsetInfo::execToDesc(const char *s) const
130*0Sstevel@tonic-gate {
131*0Sstevel@tonic-gate StringC result;
132*0Sstevel@tonic-gate while (*s != '\0')
133*0Sstevel@tonic-gate result += execToDesc(*s++);
134*0Sstevel@tonic-gate return result;
135*0Sstevel@tonic-gate }
136*0Sstevel@tonic-gate
137*0Sstevel@tonic-gate #ifdef SP_NAMESPACE
138*0Sstevel@tonic-gate }
139*0Sstevel@tonic-gate #endif
140