1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident "%Z%%M% %I% %E% SMI"
4
5 #ifndef Syntax_INCLUDED
6 #define Syntax_INCLUDED 1
7 #ifdef __GNUG__
8 #pragma interface
9 #endif
10
11 #include "types.h"
12 #include "Boolean.h"
13 #include "ISet.h"
14 #include "StringC.h"
15 #include "SubstTable.h"
16 #include "HashTable.h"
17 #include "Vector.h"
18 #include "Resource.h"
19 #include "XcharMap.h"
20 #include "EntityCatalog.h"
21
22 #ifdef SP_NAMESPACE
23 namespace SP_NAMESPACE {
24 #endif
25
26 class Sd;
27 class CharsetInfo;
28
29 class SP_API Syntax : public Resource, public EntityCatalog::Syntax {
30 public:
31 enum ReservedName {
32 rALL,
33 rANY,
34 rATTLIST,
35 rCDATA,
36 rCONREF,
37 rCURRENT,
38 rDATA,
39 rDEFAULT,
40 rDOCTYPE,
41 rELEMENT,
42 rEMPTY,
43 rENDTAG,
44 rENTITIES,
45 rENTITY,
46 rFIXED,
47 rID,
48 rIDLINK,
49 rIDREF,
50 rIDREFS,
51 rIGNORE,
52 rIMPLICIT,
53 rIMPLIED,
54 rINCLUDE,
55 rINITIAL,
56 rLINK,
57 rLINKTYPE,
58 rMD,
59 rMS,
60 rNAME,
61 rNAMES,
62 rNDATA,
63 rNMTOKEN,
64 rNMTOKENS,
65 rNOTATION,
66 rNUMBER,
67 rNUMBERS,
68 rNUTOKEN,
69 rNUTOKENS,
70 rO,
71 rPCDATA,
72 rPI,
73 rPOSTLINK,
74 rPUBLIC,
75 rRCDATA,
76 rRE,
77 rREQUIRED,
78 rRESTORE,
79 rRS,
80 rSDATA,
81 rSHORTREF,
82 rSIMPLE,
83 rSPACE,
84 rSTARTTAG,
85 rSUBDOC,
86 rSYSTEM,
87 rTEMP,
88 rUSELINK,
89 rUSEMAP
90 };
91 enum { nNames = rUSEMAP + 1 };
92 enum Quantity {
93 qATTCNT,
94 qATTSPLEN,
95 qBSEQLEN,
96 qDTAGLEN,
97 qDTEMPLEN,
98 qENTLVL,
99 qGRPCNT,
100 qGRPGTCNT,
101 qGRPLVL,
102 qLITLEN,
103 qNAMELEN,
104 qNORMSEP,
105 qPILEN,
106 qTAGLEN,
107 qTAGLVL
108 };
109 enum { nQuantity = qTAGLVL + 1 };
110 enum { unlimited = 100000000 };
111 enum DelimGeneral {
112 dAND,
113 dCOM,
114 dCRO,
115 dDSC,
116 dDSO,
117 dDTGC,
118 dDTGO,
119 dERO,
120 dETAGO,
121 dGRPC,
122 dGRPO,
123 dHCRO, // WWW TC addition
124 dLIT,
125 dLITA,
126 dMDC,
127 dMDO,
128 dMINUS,
129 dMSC,
130 dNET,
131 dNESTC, // WWW TC addition
132 dOPT,
133 dOR,
134 dPERO,
135 dPIC,
136 dPIO,
137 dPLUS,
138 dREFC,
139 dREP,
140 dRNI,
141 dSEQ,
142 dSTAGO,
143 dTAGC,
144 dVI
145 };
146 enum { nDelimGeneral = dVI + 1 };
147 enum StandardFunction {
148 fRE,
149 fRS,
150 fSPACE
151 };
152 enum FunctionClass {
153 cFUNCHAR,
154 cSEPCHAR,
155 cMSOCHAR,
156 cMSICHAR,
157 cMSSCHAR
158 };
159 enum Set {
160 nameStart,
161 digit,
162 hexDigit,
163 nmchar, // LCNMCHAR or UCNMCHAR
164 s,
165 blank,
166 sepchar,
167 minimumData,
168 significant,
169 functionChar, // function character
170 sgmlChar
171 };
172 enum { nSet = sgmlChar + 1 };
173 enum Category {
174 otherCategory = 0,
175 sCategory = 01,
176 nameStartCategory = 02,
177 digitCategory = 04,
178 otherNameCategory = 010
179 };
180
181 Syntax(const Sd &);
182 Boolean lookupFunctionChar(const StringC &, Char *) const;
183 Boolean charFunctionName(Char c, const StringC *&name) const;
184 Boolean lookupReservedName(const StringC &, ReservedName *) const;
185 const StringC &reservedName(ReservedName) const;
186 StringC rniReservedName(ReservedName) const;
187 Number quantity(Quantity) const;
188 Char standardFunction(int) const;
189 Boolean getStandardFunction(int, Char &) const;
190 const StringC &delim() const;
191 const ISet<Char> *charSet(int i) const;
192 const SubstTable<Char> *generalSubstTable() const;
193 const SubstTable<Char> *entitySubstTable() const;
194 const SubstTable<Char> &upperSubstTable() const;
195 Boolean namecaseGeneral() const;
196 Boolean namecaseEntity() const;
197 const StringC &peroDelim() const;
198 const StringC &delimGeneral(int) const;
199 const StringC &delimShortrefComplex(size_t) const;
200 const ISet<Char> &delimShortrefSimple() const;
201 int nDelimShortrefComplex() const;
202 Boolean isValidShortref(const StringC &) const;
203 Boolean hasShortrefs() const;
204 Boolean isNameCharacter(Xchar) const;
205 Boolean isNameStartCharacter(Xchar) const;
206 Boolean isDigit(Xchar) const;
207 Boolean isHexDigit(Xchar) const;
208 Boolean isS(Xchar) const;
209 Boolean isB(Xchar c) const;
210 Category charCategory(Xchar) const;
211 Boolean isSgmlChar(Xchar) const;
212 size_t attcnt() const;
213 size_t attsplen() const;
214 size_t namelen() const;
215 size_t penamelen() const;
216 size_t litlen() const;
217 size_t normsep() const;
218 size_t dtemplen() const;
219 size_t grpcnt() const;
220 size_t grpgtcnt() const;
221 size_t grplvl() const;
222 size_t taglvl() const;
223 size_t taglen() const;
224 size_t entlvl() const;
225 size_t pilen() const;
226 Char space() const;
227
228 void setStandardFunction(StandardFunction, Char);
229 void enterStandardFunctionNames();
230 void addFunctionChar(const StringC &, FunctionClass, Char);
231 void setNamecaseGeneral(Boolean);
232 void setNamecaseEntity(Boolean);
233 void setDelimGeneral(int, const StringC &);
234 void addDelimShortref(const StringC &, const CharsetInfo &);
235 void addDelimShortrefs(const ISet<Char> &shortrefChars,
236 const CharsetInfo &charset);
237 void addNameCharacters(const ISet<Char> &);
238 void addNameStartCharacters(const ISet<Char> &);
239 void addSubst(Char lc, Char uc);
240 void addShunchar(Char);
241 void setShuncharControls();
242 void setQuantity(int, Number);
243 void setName(int, const StringC &);
244 void setSgmlChar(const ISet<Char> &);
245 void implySgmlChar(const Sd &);
246 // :: is for Watcom 10.0a
247 void checkSgmlChar(const Sd &,
248 const ::SP_NAMESPACE_SCOPE Syntax *otherSyntax,
249 Boolean invalidUseDocumentCharset,
250 ISet<WideChar> &invalid)
251 const;
252 static int referenceQuantity(Quantity);
253 const XcharMap<unsigned char> &markupScanTable() const;
254 Boolean multicode() const;
255 void addEntity(const StringC &, Char);
256 size_t nEntities() const;
257 const StringC &entityName(size_t) const;
258 Char entityChar(size_t) const;
259 private:
260 void subst(Char, Char);
261 void checkUnivControlChar(UnivChar univChar,
262 const CharsetInfo &docCharset,
263 const ::SP_NAMESPACE_SCOPE Syntax *otherSyntax,
264 ISet<WideChar> &invalid) const;
265
266 ISet<Char> shunchar_;
267 PackedBoolean shuncharControls_;
268 ISet<Char> set_[nSet];
269 Char standardFunction_[3];
270 PackedBoolean standardFunctionValid_[3];
271 Boolean namecaseGeneral_;
272 Boolean namecaseEntity_;
273 StringC delimGeneral_[nDelimGeneral];
274 Vector<StringC> delimShortrefComplex_;
275 ISet<Char> delimShortrefSimple_;
276 StringC names_[nNames];
277 Number quantity_[nQuantity];
278 HashTable<StringC,int> nameTable_;
279 HashTable<StringC,Char> functionTable_;
280 SubstTable<Char> upperSubst_;
281 SubstTable<Char> identitySubst_;
282 const SubstTable<Char> *generalSubst_;
283 const SubstTable<Char> *entitySubst_;
284 XcharMap<unsigned char> categoryTable_;
285 Boolean multicode_;
286 XcharMap<unsigned char> markupScanTable_;
287 Vector<StringC> entityNames_;
288 StringC entityChars_;
289 static const int referenceQuantity_[];
290 };
291
quantity(Quantity q)292 inline Number Syntax::quantity(Quantity q) const
293 {
294 return quantity_[q];
295 }
296
setQuantity(int i,Number n)297 inline void Syntax::setQuantity(int i, Number n)
298 {
299 quantity_[i] = n;
300 }
301
generalSubstTable()302 inline const SubstTable<Char> *Syntax::generalSubstTable() const
303 {
304 return generalSubst_;
305 }
306
entitySubstTable()307 inline const SubstTable<Char> *Syntax::entitySubstTable() const
308 {
309 return entitySubst_;
310 }
311
nDelimShortrefComplex()312 inline int Syntax::nDelimShortrefComplex() const
313 {
314 return int(delimShortrefComplex_.size());
315 }
316
delimGeneral(int i)317 inline const StringC &Syntax::delimGeneral(int i) const
318 {
319 return delimGeneral_[i];
320 }
321
delimShortrefComplex(size_t i)322 inline const StringC &Syntax::delimShortrefComplex(size_t i) const
323 {
324 return delimShortrefComplex_[i];
325 }
326
delimShortrefSimple()327 inline const ISet<Char> &Syntax::delimShortrefSimple() const
328 {
329 return delimShortrefSimple_;
330 }
331
hasShortrefs()332 inline Boolean Syntax::hasShortrefs() const
333 {
334 return delimShortrefComplex_.size() > 0 || !delimShortrefSimple_.isEmpty();
335 }
336
standardFunction(int i)337 inline Char Syntax::standardFunction(int i) const
338 {
339 return standardFunction_[i];
340 }
341
getStandardFunction(int i,Char & result)342 inline Boolean Syntax::getStandardFunction(int i, Char &result) const
343 {
344 if (standardFunctionValid_[i]) {
345 result = standardFunction_[i];
346 return 1;
347 }
348 else
349 return 0;
350 }
351
charSet(int i)352 inline const ISet<Char> *Syntax::charSet(int i) const
353 {
354 return &set_[i];
355 }
356
isNameCharacter(Xchar c)357 inline Boolean Syntax::isNameCharacter(Xchar c) const
358 {
359 return categoryTable_[c] >= nameStartCategory;
360 }
361
isNameStartCharacter(Xchar c)362 inline Boolean Syntax::isNameStartCharacter(Xchar c) const
363 {
364 return categoryTable_[c] == nameStartCategory;
365 }
366
isDigit(Xchar c)367 inline Boolean Syntax::isDigit(Xchar c) const
368 {
369 return categoryTable_[c] == digitCategory;
370 }
371
isS(Xchar c)372 inline Boolean Syntax::isS(Xchar c) const
373 {
374 return categoryTable_[c] == sCategory;
375 }
376
isB(Xchar c)377 inline Boolean Syntax::isB(Xchar c) const
378 {
379 return (categoryTable_[c] == sCategory
380 && !(standardFunctionValid_[fRE] && c == standardFunction_[fRE])
381 && !(standardFunctionValid_[fRS] && c == standardFunction_[fRS]));
382 }
383
charCategory(Xchar c)384 inline Syntax::Category Syntax::charCategory(Xchar c) const
385 {
386 return Category(categoryTable_[c]);
387 }
388
isSgmlChar(Xchar c)389 inline Boolean Syntax::isSgmlChar(Xchar c) const
390 {
391 return c >= 0 && set_[sgmlChar].contains(Char(c));
392 }
393
reservedName(ReservedName i)394 inline const StringC &Syntax::reservedName(ReservedName i) const
395 {
396 return names_[i];
397 }
398
attcnt()399 inline size_t Syntax::attcnt() const
400 {
401 return quantity(Syntax::qATTCNT);
402 }
403
attsplen()404 inline size_t Syntax::attsplen() const
405 {
406 return quantity(Syntax::qATTSPLEN);
407 }
408
namelen()409 inline size_t Syntax::namelen() const
410 {
411 return quantity(Syntax::qNAMELEN);
412 }
413
penamelen()414 inline size_t Syntax::penamelen() const
415 {
416 return quantity(Syntax::qNAMELEN) - delimGeneral(Syntax::dPERO).size();
417 }
418
litlen()419 inline size_t Syntax::litlen() const
420 {
421 return quantity(Syntax::qLITLEN);
422 }
423
normsep()424 inline size_t Syntax::normsep() const
425 {
426 return quantity(Syntax::qNORMSEP);
427 }
428
dtemplen()429 inline size_t Syntax::dtemplen() const
430 {
431 return quantity(Syntax::qDTEMPLEN);
432 }
433
grpcnt()434 inline size_t Syntax::grpcnt() const
435 {
436 return quantity(Syntax::qGRPCNT);
437 }
438
grpgtcnt()439 inline size_t Syntax::grpgtcnt() const
440 {
441 return quantity(Syntax::qGRPGTCNT);
442 }
443
grplvl()444 inline size_t Syntax::grplvl() const
445 {
446 return quantity(Syntax::qGRPLVL);
447 }
448
taglvl()449 inline size_t Syntax::taglvl() const
450 {
451 return quantity(Syntax::qTAGLVL);
452 }
453
taglen()454 inline size_t Syntax::taglen() const
455 {
456 return quantity(Syntax::qTAGLEN);
457 }
458
entlvl()459 inline size_t Syntax::entlvl() const
460 {
461 return quantity(Syntax::qENTLVL);
462 }
463
pilen()464 inline size_t Syntax::pilen() const
465 {
466 return quantity(Syntax::qPILEN);
467 }
468
space()469 inline Char Syntax::space() const
470 {
471 return standardFunction(Syntax::fSPACE);
472 }
473
setSgmlChar(const ISet<Char> & set)474 inline void Syntax::setSgmlChar(const ISet<Char> &set)
475 {
476 set_[sgmlChar] = set;
477 }
478
referenceQuantity(Quantity i)479 inline int Syntax::referenceQuantity(Quantity i)
480 {
481 return referenceQuantity_[i];
482 }
483
setShuncharControls()484 inline void Syntax::setShuncharControls()
485 {
486 shuncharControls_ = 1;
487 }
488
markupScanTable()489 inline const XcharMap<unsigned char> &Syntax::markupScanTable() const
490 {
491 return markupScanTable_;
492 }
493
multicode()494 inline Boolean Syntax::multicode() const
495 {
496 return multicode_;
497 }
498
namecaseGeneral()499 inline Boolean Syntax::namecaseGeneral() const
500 {
501 return namecaseGeneral_;
502 }
503
namecaseEntity()504 inline Boolean Syntax::namecaseEntity() const
505 {
506 return namecaseEntity_;
507 }
508
nEntities()509 inline size_t Syntax::nEntities() const
510 {
511 return entityNames_.size();
512 }
513
entityName(size_t i)514 inline const StringC &Syntax::entityName(size_t i) const
515 {
516 return entityNames_[i];
517 }
518
entityChar(size_t i)519 inline Char Syntax::entityChar(size_t i) const
520 {
521 return entityChars_[i];
522 }
523
524 #ifdef SP_NAMESPACE
525 }
526 #endif
527
528 #endif /* Syntax_INCLUDED */
529