1 // Copyright (c) 1994, 1997 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident "%Z%%M% %I% %E% SMI"
4
5 #ifdef __GNUG__
6 #pragma implementation
7 #endif
8 #include "splib.h"
9 #include "CharsetInfo.h"
10 #include "ISet.h"
11 #include "constant.h"
12
13 #ifdef SP_NAMESPACE
14 namespace SP_NAMESPACE {
15 #endif
16
CharsetInfo(const UnivCharsetDesc & desc)17 CharsetInfo::CharsetInfo(const UnivCharsetDesc &desc)
18 : desc_(desc)
19 {
20 // FIXME remove mappings from desc for characters greater charMax
21 init();
22 }
23
CharsetInfo()24 CharsetInfo::CharsetInfo()
25 {
26 inverse_.setAll(unsigned(-1));
27 }
28
set(const UnivCharsetDesc & desc)29 void CharsetInfo::set(const UnivCharsetDesc &desc)
30 {
31 desc_ = desc;
32 init();
33 }
34
init()35 void CharsetInfo::init()
36 {
37 inverse_.setAll(Unsigned32(-1));
38
39 UnivCharsetDescIter iter(desc_);
40
41 WideChar descMin, descMax;
42 UnivChar univMin;
43 while (iter.next(descMin, descMax, univMin)) {
44 if (univMin <= charMax) {
45 Char univMax;
46 if (charMax - univMin < descMax - descMin)
47 univMax = charMax;
48 else
49 univMax = univMin + (descMax - descMin);
50 Unsigned32 diff
51 = ((descMin - univMin) & ((Unsigned32(1) << 31) - 1));
52 for (;;) {
53 Char max;
54 Unsigned32 n = inverse_.getRange(univMin, max);
55 if (max > univMax)
56 max = univMax;
57 if (n == Unsigned32(-1))
58 inverse_.setRange(univMin, max, diff);
59 else if (n != Unsigned32(-2))
60 inverse_.setRange(univMin, max, Unsigned32(-2));
61 if (max == univMax)
62 break;
63 univMin = max + 1;
64 }
65 }
66 }
67 // These are the characters that the ANSI C
68 // standard guarantees will be in the basic execution
69 // character set.
70 static char execChars[] =
71 "\t\n\r "
72 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
73 "abcdefghijklmnopqrstuvwxyz"
74 "0123456789"
75 "!\"#%&'()*+,-./:"
76 ";<=>?[\\]^_{|}~";
77 // These are the corresponding ISO 646 codes.
78 static char univCodes[] = {
79 9, 10, 13, 32,
80 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
81 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
82 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
83 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
84 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
85 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 58,
86 59, 60, 61, 62, 63, 91, 92, 93, 94, 95, 123, 124, 125, 126,
87 };
88 for (size_t i = 0; execChars[i] != '\0'; i++) {
89 WideChar c;
90 ISet<WideChar> set;
91 if (univToDesc(univCodes[i], c, set) > 0 && c <= Char(-1))
92 execToDesc_[(unsigned char)execChars[i]] = Char(c);
93 }
94 }
95
getDescSet(ISet<Char> & set) const96 void CharsetInfo::getDescSet(ISet<Char> &set) const
97 {
98 UnivCharsetDescIter iter(desc_);
99 WideChar descMin, descMax;
100 UnivChar univMin;
101 while (iter.next(descMin, descMax, univMin)) {
102 if (descMin > charMax)
103 break;
104 if (descMax > charMax)
105 descMax = charMax;
106 set.addRange(Char(descMin), Char(descMax));
107 }
108 }
109
digitWeight(Char c) const110 int CharsetInfo::digitWeight(Char c) const
111 {
112 for (int i = 0; i < 10; i++)
113 if (c == execToDesc('0' + i))
114 return i;
115 return -1;
116 }
117
hexDigitWeight(Char c) const118 int CharsetInfo::hexDigitWeight(Char c) const
119 {
120 for (int i = 0; i < 10; i++)
121 if (c == execToDesc('0' + i))
122 return i;
123 for (int i = 0; i < 6; i++)
124 if (c == execToDesc('a' + i) || c == execToDesc('A' + i))
125 return i + 10;
126 return -1;
127 }
128
execToDesc(const char * s) const129 StringC CharsetInfo::execToDesc(const char *s) const
130 {
131 StringC result;
132 while (*s != '\0')
133 result += execToDesc(*s++);
134 return result;
135 }
136
137 #ifdef SP_NAMESPACE
138 }
139 #endif
140