1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident	"%Z%%M%	%I%	%E% SMI"
4 
5 #include "splib.h"
6 
7 #ifdef SP_MULTI_BYTE
8 
9 #include "UnicodeCodingSystem.h"
10 #include "macros.h"
11 #include "Owner.h"
12 
13 #include <stddef.h>
14 #include <string.h>
15 #ifdef DECLARE_MEMMOVE
16 extern "C" {
17   void *memmove(void *, const void *, size_t);
18 }
19 #endif
20 
21 #ifdef SP_NAMESPACE
22 namespace SP_NAMESPACE {
23 #endif
24 
25 const unsigned short byteOrderMark = 0xfeff;
26 const unsigned short swappedByteOrderMark = 0xfffe;
27 
28 class UnicodeDecoder : public Decoder {
29 public:
30   UnicodeDecoder(const InputCodingSystem *sub);
31   size_t decode(Char *to, const char *from, size_t fromLen,
32 		const char **rest);
33   Boolean convertOffset(unsigned long &offset) const;
34 private:
35   PackedBoolean hadFirstChar_;
36   PackedBoolean hadByteOrderMark_;
37   PackedBoolean swapBytes_;
38   Owner<Decoder> subDecoder_;
39   const InputCodingSystem *subCodingSystem_;
40 };
41 
42 class UnicodeEncoder : public Encoder {
43 public:
44   UnicodeEncoder();
45   ~UnicodeEncoder();
46   void output(Char *, size_t, OutputByteStream *);
47   void output(const Char *, size_t, OutputByteStream *);
48   void startFile(OutputByteStream *);
49 private:
50   void allocBuf(size_t);
51   unsigned short *buf_;
52   size_t bufSize_;
53 };
54 
UnicodeCodingSystem(const InputCodingSystem * sub)55 UnicodeCodingSystem::UnicodeCodingSystem(const InputCodingSystem *sub)
56 : sub_(sub)
57 {
58 }
59 
makeDecoder() const60 Decoder *UnicodeCodingSystem::makeDecoder() const
61 {
62   return new UnicodeDecoder(sub_);
63 }
64 
makeEncoder() const65 Encoder *UnicodeCodingSystem::makeEncoder() const
66 {
67   return new UnicodeEncoder;
68 }
69 
fixedBytesPerChar() const70 unsigned UnicodeCodingSystem::fixedBytesPerChar() const
71 {
72   return 2;
73 }
74 
UnicodeDecoder(const InputCodingSystem * subCodingSystem)75 UnicodeDecoder::UnicodeDecoder(const InputCodingSystem *subCodingSystem)
76 : Decoder(subCodingSystem ? 1 : 2), subCodingSystem_(subCodingSystem),
77   hadByteOrderMark_(0), hadFirstChar_(0), swapBytes_(0)
78 {
79 }
80 
81 
decode(Char * to,const char * from,size_t fromLen,const char ** rest)82 size_t UnicodeDecoder::decode(Char *to, const char *from, size_t fromLen,
83 			      const char **rest)
84 {
85   union U {
86     unsigned short word;
87     char bytes[2];
88   };
89 
90   if (subDecoder_)
91     return subDecoder_->decode(to, from, fromLen, rest);
92   if (!hadFirstChar_) {
93     if (fromLen < 2) {
94       *rest = from;
95       return 0;
96     }
97     hadFirstChar_ = 1;
98     minBytesPerChar_ = 2;
99     U u;
100     u.bytes[0] = from[0];
101     u.bytes[1] = from[1];
102     if (u.word == byteOrderMark) {
103       hadByteOrderMark_ = 1;
104       from += 2;
105       fromLen -= 2;
106     }
107     else if (u.word == swappedByteOrderMark) {
108       hadByteOrderMark_ = 1;
109       from += 2;
110       fromLen -= 2;
111       swapBytes_ = 1;
112     }
113     else if (subCodingSystem_) {
114       subDecoder_ = subCodingSystem_->makeDecoder();
115       minBytesPerChar_ = subDecoder_->minBytesPerChar();
116       return subDecoder_->decode(to, from, fromLen, rest);
117     }
118   }
119   fromLen &= ~1;
120   *rest = from + fromLen;
121   if (sizeof(Char) == 2) {
122     if (!swapBytes_) {
123       if (from != (char *)to)
124 	memmove(to, from, fromLen);
125       return fromLen/2;
126     }
127   }
128   if (swapBytes_) {
129     for (size_t n = fromLen; n > 0; n -= 2) {
130       U u;
131       u.bytes[1] = *from++;
132       u.bytes[0] = *from++;
133       *to++ = u.word;
134     }
135   }
136   else  {
137     for (size_t n = fromLen; n > 0; n -= 2) {
138       U u;
139       u.bytes[0] = *from++;
140       u.bytes[1] = *from++;
141       *to++ = u.word;
142     }
143   }
144   return fromLen/2;
145 }
146 
convertOffset(unsigned long & n) const147 Boolean UnicodeDecoder::convertOffset(unsigned long &n) const
148 {
149   if (subDecoder_)
150     return subDecoder_->convertOffset(n);
151   if (hadByteOrderMark_)
152     n += 1;
153   n *= 2;
154   return true;
155 }
156 
UnicodeEncoder()157 UnicodeEncoder::UnicodeEncoder()
158 : buf_(0), bufSize_(0)
159 {
160 }
161 
~UnicodeEncoder()162 UnicodeEncoder::~UnicodeEncoder()
163 {
164   delete [] buf_;
165 }
166 
allocBuf(size_t n)167 void UnicodeEncoder::allocBuf(size_t n)
168 {
169   if (bufSize_ < n) {
170     delete [] buf_;
171     buf_ = new unsigned short[bufSize_ = n];
172   }
173 }
174 
startFile(OutputByteStream * sb)175 void UnicodeEncoder::startFile(OutputByteStream *sb)
176 {
177   const unsigned short n = byteOrderMark;
178   sb->sputn((char *)&n, 2);
179 }
180 
output(Char * s,size_t n,OutputByteStream * sb)181 void UnicodeEncoder::output(Char *s, size_t n, OutputByteStream *sb)
182 {
183   if (sizeof(Char) == 2) {
184     sb->sputn((char *)s, n*2);
185     return;
186   }
187   ASSERT(sizeof(Char) >= 2);
188   unsigned short *p = (unsigned short *)s;
189   for (size_t i = 0; i < n; i++)
190     p[i] = s[i] & 0xffff;
191   sb->sputn((char *)s, n*2);
192 }
193 
output(const Char * s,size_t n,OutputByteStream * sb)194 void UnicodeEncoder::output(const Char *s, size_t n, OutputByteStream *sb)
195 {
196   if (sizeof(Char) == 2) {
197     sb->sputn((char *)s, n*2);
198     return;
199   }
200   allocBuf(n);
201   for (size_t i = 0; i < n; i++)
202     buf_[i] = s[i] & 0xffff;
203   sb->sputn((char *)buf_, n*2);
204 }
205 
206 #ifdef SP_NAMESPACE
207 }
208 #endif
209 
210 #else /* not SP_MULTI_BYTE */
211 
212 #ifndef __GNUG__
213 static char non_empty_translation_unit;	// sigh
214 #endif
215 
216 #endif /* not SP_MULTI_BYTE */
217