1 // Copyright (c) 1994, 1997 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident	"%Z%%M%	%I%	%E% SMI"
4 
5 #ifndef UnivCharsetDesc_INCLUDED
6 #define UnivCharsetDesc_INCLUDED 1
7 #ifdef __GNUG__
8 #pragma interface
9 #endif
10 
11 #include <stddef.h>
12 #include "types.h"
13 #include "CharMap.h"
14 #include "RangeMap.h"
15 #include "Boolean.h"
16 #include "ISet.h"
17 #include "constant.h"
18 
19 #ifdef SP_NAMESPACE
20 namespace SP_NAMESPACE {
21 #endif
22 
23 class SP_API UnivCharsetDesc {
24 public:
25   struct SP_API Range {
26     WideChar descMin;
27     // Note that this is a count, as in the SGML declaration,
28     // rather than a maximum.
29     unsigned long count;
30     UnivChar univMin;
31   };
32   enum {
33     zero = 48,
34     A = 65,
35     a = 97,
36     tab = 9,
37     rs = 10,
38     re = 13,
39     space = 32,
40     exclamation = 33,
41     lessThan = 60,
42     greaterThan = 62
43     };
44   UnivCharsetDesc();
45   UnivCharsetDesc(const Range *, size_t);
46   void set(const Range *, size_t);
47   Boolean descToUniv(WideChar from, UnivChar &to) const;
48   Boolean descToUniv(WideChar from, UnivChar &to, WideChar &alsoMax) const;
49   // Return 0 for no matches, 1 for 1, 2 for more than 1
50   unsigned univToDesc(UnivChar from, WideChar &to, ISet<WideChar> &toSet)
51        const;
52   unsigned univToDesc(UnivChar from, WideChar &to, ISet<WideChar> &toSet,
53 		      WideChar &count)
54        const;
55   void addRange(WideChar descMin, WideChar descMax, UnivChar univMin);
56   void addBaseRange(const UnivCharsetDesc &baseSet,
57 		    WideChar descMin,
58 		    WideChar descMax,
59 		    WideChar baseMin,
60 		    ISet<WideChar> &baseMissing);
61 private:
noDesc(Unsigned32 n)62   static Boolean noDesc(Unsigned32 n) {
63     return (n & (unsigned(1) << 31));
64   }
extractChar(Unsigned32 n,Char ch)65   static UnivChar extractChar(Unsigned32 n, Char ch) {
66     return UnivChar((n + ch) & ((unsigned(1) << 31) - 1));
67   }
wrapChar(UnivChar univ,Char ch)68   static Unsigned32 wrapChar(UnivChar univ, Char ch) {
69     return Unsigned32((univ - ch) & ((unsigned(1) << 31) - 1));
70   }
71   // For characters <= charMax.<
72   CharMap<Unsigned32> charMap_;
73   // For characters > charMax.
74   RangeMap<WideChar,UnivChar> rangeMap_;
75   friend class UnivCharsetDescIter;
76 };
77 
78 class SP_API UnivCharsetDescIter {
79 public:
80   UnivCharsetDescIter(const UnivCharsetDesc &);
81   Boolean next(WideChar &descMin, WideChar &descMax, UnivChar &univMin);
82   void skipTo(WideChar);
83 private:
84   const CharMap<Unsigned32> *charMap_;
85   Char nextChar_;
86   Boolean doneCharMap_;
87   RangeMapIter<WideChar,UnivChar> rangeMapIter_;
88 };
89 
90 inline
descToUniv(WideChar from,UnivChar & to)91 Boolean UnivCharsetDesc::descToUniv(WideChar from, UnivChar &to) const
92 {
93   if (from > charMax) {
94     WideChar tem;
95     return rangeMap_.map(from, to, tem);
96   }
97   else {
98     Unsigned32 tem = charMap_[from];
99     if (noDesc(tem))
100       return 0;
101     else {
102       to = extractChar(tem, from);
103       return 1;
104     }
105   }
106 }
107 
108 inline
descToUniv(WideChar from,UnivChar & to,WideChar & alsoMax)109 Boolean UnivCharsetDesc::descToUniv(WideChar from, UnivChar &to,
110 				    WideChar &alsoMax) const
111 {
112   if (from > charMax)
113     return rangeMap_.map(from, to, alsoMax);
114   else {
115     Char max;
116     Unsigned32 tem = charMap_.getRange(from, max);
117     alsoMax = max;
118     if (noDesc(tem))
119       return 0;
120     else {
121       to = extractChar(tem, from);
122       return 1;
123     }
124   }
125 }
126 
127 inline
univToDesc(UnivChar from,WideChar & to,ISet<WideChar> & toSet)128 unsigned UnivCharsetDesc::univToDesc(UnivChar from, WideChar &to,
129 				     ISet<WideChar> &toSet) const
130 {
131   WideChar tem;
132   return univToDesc(from, to, toSet, tem);
133 }
134 
135 inline
skipTo(WideChar ch)136 void UnivCharsetDescIter::skipTo(WideChar ch)
137 {
138   if (ch > charMax)
139     doneCharMap_ = 1;
140   else
141     nextChar_ = ch;
142 }
143 
144 #ifdef SP_NAMESPACE
145 }
146 #endif
147 
148 #endif /* not UnivCharsetDesc_INCLUDED */
149