xref: /onnv-gate/usr/src/cmd/man/src/util/nsgmls.src/include/Syntax.h (revision 0:68f95e015346)
1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident	"%Z%%M%	%I%	%E% SMI"
4 
5 #ifndef Syntax_INCLUDED
6 #define Syntax_INCLUDED 1
7 #ifdef __GNUG__
8 #pragma interface
9 #endif
10 
11 #include "types.h"
12 #include "Boolean.h"
13 #include "ISet.h"
14 #include "StringC.h"
15 #include "SubstTable.h"
16 #include "HashTable.h"
17 #include "Vector.h"
18 #include "Resource.h"
19 #include "XcharMap.h"
20 #include "EntityCatalog.h"
21 
22 #ifdef SP_NAMESPACE
23 namespace SP_NAMESPACE {
24 #endif
25 
26 class Sd;
27 class CharsetInfo;
28 
29 class SP_API Syntax : public Resource, public EntityCatalog::Syntax {
30 public:
31   enum ReservedName {
32     rALL,
33     rANY,
34     rATTLIST,
35     rCDATA,
36     rCONREF,
37     rCURRENT,
38     rDATA,
39     rDEFAULT,
40     rDOCTYPE,
41     rELEMENT,
42     rEMPTY,
43     rENDTAG,
44     rENTITIES,
45     rENTITY,
46     rFIXED,
47     rID,
48     rIDLINK,
49     rIDREF,
50     rIDREFS,
51     rIGNORE,
52     rIMPLICIT,
53     rIMPLIED,
54     rINCLUDE,
55     rINITIAL,
56     rLINK,
57     rLINKTYPE,
58     rMD,
59     rMS,
60     rNAME,
61     rNAMES,
62     rNDATA,
63     rNMTOKEN,
64     rNMTOKENS,
65     rNOTATION,
66     rNUMBER,
67     rNUMBERS,
68     rNUTOKEN,
69     rNUTOKENS,
70     rO,
71     rPCDATA,
72     rPI,
73     rPOSTLINK,
74     rPUBLIC,
75     rRCDATA,
76     rRE,
77     rREQUIRED,
78     rRESTORE,
79     rRS,
80     rSDATA,
81     rSHORTREF,
82     rSIMPLE,
83     rSPACE,
84     rSTARTTAG,
85     rSUBDOC,
86     rSYSTEM,
87     rTEMP,
88     rUSELINK,
89     rUSEMAP
90   };
91   enum { nNames = rUSEMAP + 1 };
92   enum Quantity {
93     qATTCNT,
94     qATTSPLEN,
95     qBSEQLEN,
96     qDTAGLEN,
97     qDTEMPLEN,
98     qENTLVL,
99     qGRPCNT,
100     qGRPGTCNT,
101     qGRPLVL,
102     qLITLEN,
103     qNAMELEN,
104     qNORMSEP,
105     qPILEN,
106     qTAGLEN,
107     qTAGLVL
108   };
109   enum { nQuantity = qTAGLVL + 1 };
110   enum { unlimited = 100000000 };
111   enum DelimGeneral {
112     dAND,
113     dCOM,
114     dCRO,
115     dDSC,
116     dDSO,
117     dDTGC,
118     dDTGO,
119     dERO,
120     dETAGO,
121     dGRPC,
122     dGRPO,
123     dHCRO, // WWW TC addition
124     dLIT,
125     dLITA,
126     dMDC,
127     dMDO,
128     dMINUS,
129     dMSC,
130     dNET,
131     dNESTC, // WWW TC addition
132     dOPT,
133     dOR,
134     dPERO,
135     dPIC,
136     dPIO,
137     dPLUS,
138     dREFC,
139     dREP,
140     dRNI,
141     dSEQ,
142     dSTAGO,
143     dTAGC,
144     dVI
145   };
146   enum { nDelimGeneral = dVI + 1 };
147   enum StandardFunction {
148     fRE,
149     fRS,
150     fSPACE
151   };
152   enum FunctionClass {
153     cFUNCHAR,
154     cSEPCHAR,
155     cMSOCHAR,
156     cMSICHAR,
157     cMSSCHAR
158   };
159   enum Set {
160     nameStart,
161     digit,
162     hexDigit,
163     nmchar,			// LCNMCHAR or UCNMCHAR
164     s,
165     blank,
166     sepchar,
167     minimumData,
168     significant,
169     functionChar,		// function character
170     sgmlChar
171   };
172   enum { nSet = sgmlChar + 1 };
173   enum Category {
174     otherCategory = 0,
175     sCategory = 01,
176     nameStartCategory = 02,
177     digitCategory = 04,
178     otherNameCategory = 010
179     };
180 
181   Syntax(const Sd &);
182   Boolean lookupFunctionChar(const StringC &, Char *) const;
183   Boolean charFunctionName(Char c, const StringC *&name) const;
184   Boolean lookupReservedName(const StringC &, ReservedName *) const;
185   const StringC &reservedName(ReservedName) const;
186   StringC rniReservedName(ReservedName) const;
187   Number quantity(Quantity) const;
188   Char standardFunction(int) const;
189   Boolean getStandardFunction(int, Char &) const;
190   const StringC &delim() const;
191   const ISet<Char> *charSet(int i) const;
192   const SubstTable<Char> *generalSubstTable() const;
193   const SubstTable<Char> *entitySubstTable() const;
194   const SubstTable<Char> &upperSubstTable() const;
195   Boolean namecaseGeneral() const;
196   Boolean namecaseEntity() const;
197   const StringC &peroDelim() const;
198   const StringC &delimGeneral(int) const;
199   const StringC &delimShortrefComplex(size_t) const;
200   const ISet<Char> &delimShortrefSimple() const;
201   int nDelimShortrefComplex() const;
202   Boolean isValidShortref(const StringC &) const;
203   Boolean hasShortrefs() const;
204   Boolean isNameCharacter(Xchar) const;
205   Boolean isNameStartCharacter(Xchar) const;
206   Boolean isDigit(Xchar) const;
207   Boolean isHexDigit(Xchar) const;
208   Boolean isS(Xchar) const;
209   Boolean isB(Xchar c) const;
210   Category charCategory(Xchar) const;
211   Boolean isSgmlChar(Xchar) const;
212   size_t attcnt() const;
213   size_t attsplen() const;
214   size_t namelen() const;
215   size_t penamelen() const;
216   size_t litlen() const;
217   size_t normsep() const;
218   size_t dtemplen() const;
219   size_t grpcnt() const;
220   size_t grpgtcnt() const;
221   size_t grplvl() const;
222   size_t taglvl() const;
223   size_t taglen() const;
224   size_t entlvl() const;
225   size_t pilen() const;
226   Char space() const;
227 
228   void setStandardFunction(StandardFunction, Char);
229   void enterStandardFunctionNames();
230   void addFunctionChar(const StringC &, FunctionClass, Char);
231   void setNamecaseGeneral(Boolean);
232   void setNamecaseEntity(Boolean);
233   void setDelimGeneral(int, const StringC &);
234   void addDelimShortref(const StringC &, const CharsetInfo &);
235   void addDelimShortrefs(const ISet<Char> &shortrefChars,
236 			 const CharsetInfo &charset);
237   void addNameCharacters(const ISet<Char> &);
238   void addNameStartCharacters(const ISet<Char> &);
239   void addSubst(Char lc, Char uc);
240   void addShunchar(Char);
241   void setShuncharControls();
242   void setQuantity(int, Number);
243   void setName(int, const StringC &);
244   void setSgmlChar(const ISet<Char> &);
245   void implySgmlChar(const Sd &);
246   // :: is for Watcom 10.0a
247   void checkSgmlChar(const Sd &,
248 		     const ::SP_NAMESPACE_SCOPE Syntax *otherSyntax,
249 		     Boolean invalidUseDocumentCharset,
250 		     ISet<WideChar> &invalid)
251        const;
252   static int referenceQuantity(Quantity);
253   const XcharMap<unsigned char> &markupScanTable() const;
254   Boolean multicode() const;
255   void addEntity(const StringC &, Char);
256   size_t nEntities() const;
257   const StringC &entityName(size_t) const;
258   Char entityChar(size_t) const;
259 private:
260   void subst(Char, Char);
261   void checkUnivControlChar(UnivChar univChar,
262 			    const CharsetInfo &docCharset,
263 			    const ::SP_NAMESPACE_SCOPE Syntax *otherSyntax,
264 			    ISet<WideChar> &invalid) const;
265 
266   ISet<Char> shunchar_;
267   PackedBoolean shuncharControls_;
268   ISet<Char> set_[nSet];
269   Char standardFunction_[3];
270   PackedBoolean standardFunctionValid_[3];
271   Boolean namecaseGeneral_;
272   Boolean namecaseEntity_;
273   StringC delimGeneral_[nDelimGeneral];
274   Vector<StringC> delimShortrefComplex_;
275   ISet<Char> delimShortrefSimple_;
276   StringC names_[nNames];
277   Number quantity_[nQuantity];
278   HashTable<StringC,int> nameTable_;
279   HashTable<StringC,Char> functionTable_;
280   SubstTable<Char> upperSubst_;
281   SubstTable<Char> identitySubst_;
282   const SubstTable<Char> *generalSubst_;
283   const SubstTable<Char> *entitySubst_;
284   XcharMap<unsigned char> categoryTable_;
285   Boolean multicode_;
286   XcharMap<unsigned char> markupScanTable_;
287   Vector<StringC> entityNames_;
288   StringC entityChars_;
289   static const int referenceQuantity_[];
290 };
291 
quantity(Quantity q)292 inline Number Syntax::quantity(Quantity q) const
293 {
294   return quantity_[q];
295 }
296 
setQuantity(int i,Number n)297 inline void Syntax::setQuantity(int i, Number n)
298 {
299   quantity_[i] = n;
300 }
301 
generalSubstTable()302 inline const SubstTable<Char> *Syntax::generalSubstTable() const
303 {
304   return generalSubst_;
305 }
306 
entitySubstTable()307 inline const SubstTable<Char> *Syntax::entitySubstTable() const
308 {
309   return entitySubst_;
310 }
311 
nDelimShortrefComplex()312 inline int Syntax::nDelimShortrefComplex() const
313 {
314   return int(delimShortrefComplex_.size());
315 }
316 
delimGeneral(int i)317 inline const StringC &Syntax::delimGeneral(int i) const
318 {
319   return delimGeneral_[i];
320 }
321 
delimShortrefComplex(size_t i)322 inline const StringC &Syntax::delimShortrefComplex(size_t i) const
323 {
324   return delimShortrefComplex_[i];
325 }
326 
delimShortrefSimple()327 inline const ISet<Char> &Syntax::delimShortrefSimple() const
328 {
329   return delimShortrefSimple_;
330 }
331 
hasShortrefs()332 inline Boolean Syntax::hasShortrefs() const
333 {
334   return delimShortrefComplex_.size() > 0 || !delimShortrefSimple_.isEmpty();
335 }
336 
standardFunction(int i)337 inline Char Syntax::standardFunction(int i) const
338 {
339   return standardFunction_[i];
340 }
341 
getStandardFunction(int i,Char & result)342 inline Boolean Syntax::getStandardFunction(int i, Char &result) const
343 {
344   if (standardFunctionValid_[i]) {
345     result = standardFunction_[i];
346     return 1;
347   }
348   else
349     return 0;
350 }
351 
charSet(int i)352 inline const ISet<Char> *Syntax::charSet(int i) const
353 {
354   return &set_[i];
355 }
356 
isNameCharacter(Xchar c)357 inline Boolean Syntax::isNameCharacter(Xchar c) const
358 {
359   return categoryTable_[c] >= nameStartCategory;
360 }
361 
isNameStartCharacter(Xchar c)362 inline Boolean Syntax::isNameStartCharacter(Xchar c) const
363 {
364   return categoryTable_[c] == nameStartCategory;
365 }
366 
isDigit(Xchar c)367 inline Boolean Syntax::isDigit(Xchar c) const
368 {
369   return categoryTable_[c] == digitCategory;
370 }
371 
isS(Xchar c)372 inline Boolean Syntax::isS(Xchar c) const
373 {
374   return categoryTable_[c] == sCategory;
375 }
376 
isB(Xchar c)377 inline Boolean Syntax::isB(Xchar c) const
378 {
379   return (categoryTable_[c] == sCategory
380 	  && !(standardFunctionValid_[fRE] && c == standardFunction_[fRE])
381 	  && !(standardFunctionValid_[fRS] && c == standardFunction_[fRS]));
382 }
383 
charCategory(Xchar c)384 inline Syntax::Category Syntax::charCategory(Xchar c) const
385 {
386   return Category(categoryTable_[c]);
387 }
388 
isSgmlChar(Xchar c)389 inline Boolean Syntax::isSgmlChar(Xchar c) const
390 {
391   return c >= 0 && set_[sgmlChar].contains(Char(c));
392 }
393 
reservedName(ReservedName i)394 inline const StringC &Syntax::reservedName(ReservedName i) const
395 {
396   return names_[i];
397 }
398 
attcnt()399 inline size_t Syntax::attcnt() const
400 {
401   return quantity(Syntax::qATTCNT);
402 }
403 
attsplen()404 inline size_t Syntax::attsplen() const
405 {
406   return quantity(Syntax::qATTSPLEN);
407 }
408 
namelen()409 inline size_t Syntax::namelen() const
410 {
411   return quantity(Syntax::qNAMELEN);
412 }
413 
penamelen()414 inline size_t Syntax::penamelen() const
415 {
416   return quantity(Syntax::qNAMELEN) - delimGeneral(Syntax::dPERO).size();
417 }
418 
litlen()419 inline size_t Syntax::litlen() const
420 {
421   return quantity(Syntax::qLITLEN);
422 }
423 
normsep()424 inline size_t Syntax::normsep() const
425 {
426   return quantity(Syntax::qNORMSEP);
427 }
428 
dtemplen()429 inline size_t Syntax::dtemplen() const
430 {
431   return quantity(Syntax::qDTEMPLEN);
432 }
433 
grpcnt()434 inline size_t Syntax::grpcnt() const
435 {
436   return quantity(Syntax::qGRPCNT);
437 }
438 
grpgtcnt()439 inline size_t Syntax::grpgtcnt() const
440 {
441   return quantity(Syntax::qGRPGTCNT);
442 }
443 
grplvl()444 inline size_t Syntax::grplvl() const
445 {
446   return quantity(Syntax::qGRPLVL);
447 }
448 
taglvl()449 inline size_t Syntax::taglvl() const
450 {
451   return quantity(Syntax::qTAGLVL);
452 }
453 
taglen()454 inline size_t Syntax::taglen() const
455 {
456   return quantity(Syntax::qTAGLEN);
457 }
458 
entlvl()459 inline size_t Syntax::entlvl() const
460 {
461   return quantity(Syntax::qENTLVL);
462 }
463 
pilen()464 inline size_t Syntax::pilen() const
465 {
466   return quantity(Syntax::qPILEN);
467 }
468 
space()469 inline Char Syntax::space() const
470 {
471   return standardFunction(Syntax::fSPACE);
472 }
473 
setSgmlChar(const ISet<Char> & set)474 inline void Syntax::setSgmlChar(const ISet<Char> &set)
475 {
476   set_[sgmlChar] = set;
477 }
478 
referenceQuantity(Quantity i)479 inline int Syntax::referenceQuantity(Quantity i)
480 {
481   return referenceQuantity_[i];
482 }
483 
setShuncharControls()484 inline void Syntax::setShuncharControls()
485 {
486   shuncharControls_ = 1;
487 }
488 
markupScanTable()489 inline const XcharMap<unsigned char> &Syntax::markupScanTable() const
490 {
491   return markupScanTable_;
492 }
493 
multicode()494 inline Boolean Syntax::multicode() const
495 {
496   return multicode_;
497 }
498 
namecaseGeneral()499 inline Boolean Syntax::namecaseGeneral() const
500 {
501   return namecaseGeneral_;
502 }
503 
namecaseEntity()504 inline Boolean Syntax::namecaseEntity() const
505 {
506   return namecaseEntity_;
507 }
508 
nEntities()509 inline size_t Syntax::nEntities() const
510 {
511   return entityNames_.size();
512 }
513 
entityName(size_t i)514 inline const StringC &Syntax::entityName(size_t i) const
515 {
516   return entityNames_[i];
517 }
518 
entityChar(size_t i)519 inline Char Syntax::entityChar(size_t i) const
520 {
521   return entityChars_[i];
522 }
523 
524 #ifdef SP_NAMESPACE
525 }
526 #endif
527 
528 #endif /* Syntax_INCLUDED */
529