1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident	"%Z%%M%	%I%	%E% SMI"
4 
5 #ifndef ContentToken_INCLUDED
6 #define ContentToken_INCLUDED 1
7 #ifdef __GNUG__
8 #pragma interface
9 #endif
10 
11 #include "Owner.h"
12 #include "Text.h"
13 #include "Vector.h"
14 #include "NCVector.h"
15 #include "Boolean.h"
16 
17 #ifdef SP_NAMESPACE
18 namespace SP_NAMESPACE {
19 #endif
20 
21 class LeafContentToken;
22 
23 struct SP_API Transition {
24   enum { invalidIndex = -1 };
25   // When performing this transition, reset all andState with index >= this.
26   unsigned clearAndStateStartIndex;
27   // This transition is possible only if all AND groups whose AND depth
28   // is >= this (and contains the LeafContentToken that this transition is
29   // from) have had all their non-nullable members matched.
30   unsigned andDepth;
31   // If this is 1, then this transition requires that the AND group
32   // whose AND depth is andDepth - 1 have a non-nullable member unmatched,
33   // and thus this transition is not ambiguous with a transition whose
34   // AND depth is < andDepth.
35   PackedBoolean isolated;
36   // Index in andState that must be clear for this transition to be
37   // allowed.
38   unsigned requireClear;
39   // Index in andState that is to be set after performing this transition.
40   unsigned toSet;
41 };
42 
43 class SP_API FirstSet {
44 public:
45   FirstSet();
46   void init(LeafContentToken *);
47   void append(const FirstSet &);
48   size_t size() const;
49   LeafContentToken *token(size_t i) const;
50   size_t requiredIndex() const;
51   void setNotRequired();
52 private:
53   Vector<LeafContentToken *> v_;
54   // index of contextually required token or -1 if none
55   size_t requiredIndex_;
56 };
57 
58 class SP_API LastSet : public Vector<LeafContentToken *> {
59 public:
LastSet()60   LastSet() { }
LastSet(size_t n)61   LastSet(size_t n) : Vector<LeafContentToken *>(n) { }
62   void append(const LastSet &);
63 };
64 
65 class ElementType;
66 class AndModelGroup;
67 struct GroupInfo;
68 
69 struct SP_API ContentModelAmbiguity {
70   const LeafContentToken *from;
71   const LeafContentToken *to1;
72   const LeafContentToken *to2;
73   unsigned andDepth;
74 };
75 
76 class ModelGroup;
77 
78 class SP_API ContentToken {
79 public:
80   enum OccurrenceIndicator { none = 0, opt = 01, plus = 02, rep = 03 };
81   ContentToken(OccurrenceIndicator);
82   virtual ~ContentToken();
83   OccurrenceIndicator occurrenceIndicator() const;
84   Boolean inherentlyOptional() const;
85   static unsigned andDepth(const AndModelGroup *);
86   static unsigned andIndex(const AndModelGroup *);
87   void analyze(GroupInfo &, const AndModelGroup *, unsigned,
88 	       FirstSet &, LastSet &);
89   static void addTransitions(const LastSet &from,
90 			     const FirstSet &to,
91 			     Boolean maybeRequired,
92 			     unsigned andClearIndex,
93 			     unsigned andDepth,
94 			     Boolean isolated = 0,
95 			     unsigned requireClear
96 			       = (unsigned)Transition::invalidIndex,
97 			     unsigned toSet
98 			       = (unsigned)Transition::invalidIndex);
99   virtual void finish(Vector<unsigned> &minAndDepth,
100 		      Vector<size_t> &elementTransition,
101 		      Vector<ContentModelAmbiguity> &,
102 		      Boolean &pcdataUnreachable) = 0;
103   virtual unsigned long grpgtcnt() const;
104   virtual void setOrGroupMember();
105   unsigned andGroupIndex() const;
106   virtual const ModelGroup *asModelGroup() const;
107   virtual const LeafContentToken *asLeafContentToken() const;
108 protected:
109   PackedBoolean inherentlyOptional_;
110 private:
111   ContentToken(const ContentToken &); // undefined
112   void operator=(const ContentToken &);	// undefined
113   virtual void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
114 			FirstSet &, LastSet &) = 0;
115   OccurrenceIndicator occurrenceIndicator_;
116 };
117 
118 class SP_API ModelGroup : public ContentToken {
119 public:
120   enum Connector { andConnector, orConnector, seqConnector };
121   ModelGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
122   virtual Connector connector() const = 0;
123   unsigned nMembers() const;
124   void finish(Vector<unsigned> &minAndDepth,
125 	      Vector<size_t> &elementTransition,
126 	      Vector<ContentModelAmbiguity> &,
127 	      Boolean &pcdataUnreachable);
128   ContentToken &member(unsigned i);
129   const ContentToken &member(unsigned i) const;
130   unsigned long grpgtcnt() const;
131   const ModelGroup *asModelGroup() const;
132 protected:
133   void setOrGroup();
134 private:
135   ModelGroup(const ModelGroup &); // undefined
136   void operator=(const ModelGroup &); // undefined
137   NCVector<Owner<ContentToken> > members_;
138 };
139 
140 class AndModelGroup : public ModelGroup {
141 public:
142   AndModelGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
143   Connector connector() const;
144   unsigned andDepth() const;
145   unsigned andIndex() const;
146   unsigned andGroupIndex() const;
147   const AndModelGroup *andAncestor() const;
148 private:
149   AndModelGroup(const AndModelGroup &);	// undefined
150   void operator=(const AndModelGroup &); // undefined
151   unsigned andDepth_;		// number of and groups that contain this
152   unsigned andIndex_;
153   unsigned andGroupIndex_;
154   const AndModelGroup *andAncestor_;
155   void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
156 		FirstSet &, LastSet &);
157 };
158 
159 class OrModelGroup : public ModelGroup {
160 public:
161   OrModelGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
162   Connector connector() const;
163 private:
164   OrModelGroup(const OrModelGroup &); // undefined
165   void operator=(const OrModelGroup &);	// undefined
166   void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
167 		FirstSet &, LastSet &);
168 };
169 
170 class SeqModelGroup : public ModelGroup {
171 public:
172   SeqModelGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
173   Connector connector() const;
174 private:
175   SeqModelGroup(const SeqModelGroup &);	// undefined
176   void operator=(const SeqModelGroup &); // undefined
177   void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
178 		FirstSet &, LastSet &);
179 };
180 
181 class AndState;
182 
183 class SP_API AndInfo {
184 public:
AndInfo()185   AndInfo() { }
186   const AndModelGroup *andAncestor;
187   unsigned andGroupIndex;
188   Vector<Transition> follow;
189 private:
190   AndInfo(const AndInfo &);	// undefined
191   void operator=(const AndInfo &); // undefined
192 };
193 
194 // A LeafContentToken is not quite the same as a primitive content token.
195 // A data tag group is a primitive content token but not a LeafContentToken.
196 
197 class SP_API LeafContentToken : public ContentToken {
198 public:
199   LeafContentToken(const ElementType *, OccurrenceIndicator);
200   unsigned index() const;
201   unsigned typeIndex() const;
202   const ElementType *elementType() const;
203   virtual Boolean isInitial() const;
204   void addTransitions(const FirstSet &to,
205 		      Boolean maybeRequired,
206 		      unsigned andClearIndex,
207 		      unsigned andDepth,
208 		      Boolean isolated,
209 		      unsigned requireClear,
210 		      unsigned toSet);
211   void setFinal();
212   void finish(Vector<unsigned> &minAndDepth,
213 	      Vector<size_t> &elementTransition,
214 	      Vector<ContentModelAmbiguity> &,
215 	      Boolean &pcdataUnreachable);
216   Boolean isFinal() const;
217   Boolean tryTransition(const ElementType *, AndState &,
218 			unsigned &minAndDepth,
219 			const LeafContentToken *&newpos) const;
220   Boolean tryTransitionPcdata(AndState &, unsigned &minAndDepth,
221 			      const LeafContentToken *&newpos) const;
222   void possibleTransitions(const AndState &, unsigned minAndDepth, Vector<const ElementType *> &) const;
223   const LeafContentToken *impliedStartTag(const AndState &andpos,
224 					  unsigned minAndDepth) const;
225   const LeafContentToken *transitionToken(const ElementType *to,
226 					  const AndState &andState,
227 					  unsigned minAndDepth) const;
228   void doRequiredTransition(AndState &andState,
229 			    unsigned &minAndDepth,
230 			    const LeafContentToken *&newpos) const;
231   unsigned computeMinAndDepth(const AndState&) const;
232   Boolean orGroupMember() const;
233   void setOrGroupMember();
234   const AndModelGroup *andAncestor() const;
235   unsigned andDepth() const;
236   const LeafContentToken *asLeafContentToken() const;
237 protected:
238   void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
239 		FirstSet &, LastSet &);
240   const ElementType *element_;
241 private:
242   LeafContentToken(const LeafContentToken &); // undefined
243   void operator=(const LeafContentToken &);   // undefined
244   void andFinish(Vector<unsigned> &minAndDepth,
245 		 Vector<size_t> &elementTransition,
246 		 Vector<ContentModelAmbiguity> &,
247 		 Boolean &pcdataUnreachable);
248   unsigned computeMinAndDepth1(const AndState&) const;
249   unsigned leafIndex_;
250   unsigned typeIndex_;
251   Vector<LeafContentToken *> follow_;
252   PackedBoolean isFinal_;
253   PackedBoolean orGroupMember_;
254   // 0 none, 1 yes - simple, 2 - compled
255   char pcdataTransitionType_;
256   const LeafContentToken *simplePcdataTransition_;
257   size_t requiredIndex_;
258   Owner<AndInfo> andInfo_;
259 };
260 
261 class PcdataToken : public LeafContentToken {
262 public:
263   PcdataToken();
264   void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
265 		FirstSet &, LastSet &);
266 private:
267   PcdataToken(const PcdataToken &); // undefined
268   void operator=(const PcdataToken &); // undefined
269 };
270 
271 class InitialPseudoToken : public LeafContentToken {
272 public:
273   InitialPseudoToken();
274   Boolean isInitial() const;
275 private:
276   InitialPseudoToken(const InitialPseudoToken &); // undefined
277   void operator=(const InitialPseudoToken &); // undefined
278 };
279 
280 class ElementToken : public LeafContentToken {
281 public:
282   ElementToken(const ElementType *, OccurrenceIndicator);
283 private:
284   ElementToken(const ElementToken &); // undefined
285   void operator=(const ElementToken &); // undefined
286 };
287 
288 class DataTagGroup : public SeqModelGroup {
289 public:
290   // first content token is a DataTagElementToken, second is PcdataToken
291   DataTagGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
292 private:
293   DataTagGroup(const DataTagGroup &); // undefined
294   void operator=(const DataTagGroup &); // undefined
295 };
296 
297 class DataTagElementToken : public ElementToken {
298 public:
299   DataTagElementToken(const ElementType *, Vector<Text> &templates);
300   DataTagElementToken(const ElementType *, Vector<Text> &templates,
301 		      Text &paddingTemplate);
302 private:
303   DataTagElementToken(const DataTagElementToken &); // undefined
304   void operator=(const DataTagElementToken &); // undefined
305   Vector<Text> templates_;
306   Boolean havePaddingTemplate_;
307   Text paddingTemplate_;
308 };
309 
310 class SP_API CompiledModelGroup {
311 public:
312   CompiledModelGroup(Owner<ModelGroup> &);
313   void compile(size_t nElementTypeIndex,
314 	       Vector<ContentModelAmbiguity> &,
315 	       Boolean &pcdataUnreachable);
316   CompiledModelGroup *copy() const;
317   const LeafContentToken *initial() const;
318   unsigned andStateSize() const;
319   Boolean containsPcdata() const;
320   const ModelGroup *modelGroup() const;
321 private:
322   CompiledModelGroup(const CompiledModelGroup &); // undefined
323   void operator=(const CompiledModelGroup &);	  // undefined
324   Owner<ModelGroup> modelGroup_;
325   Owner<LeafContentToken> initial_;
326   unsigned andStateSize_;
327   Boolean containsPcdata_;
328 };
329 
330 class SP_API AndState {
331 public:
332   AndState(unsigned);
333   Boolean isClear(unsigned) const;
334   void clearFrom(unsigned);
335   void set(unsigned);
336   Boolean operator==(const AndState &) const;
337   Boolean operator!=(const AndState &) const;
338 private:
339   void clearFrom1(unsigned);
340   unsigned clearFrom_;
341   Vector<PackedBoolean> v_;
342 };
343 
344 class SP_API MatchState {
345 public:
346   MatchState();
347   MatchState(const CompiledModelGroup *); // may be 0
348   Boolean tryTransition(const ElementType *);
349   Boolean tryTransitionPcdata();
350   void possibleTransitions(Vector<const ElementType *> &) const;
351   Boolean isFinished() const;
352   const LeafContentToken *impliedStartTag() const;
353   const LeafContentToken *invalidExclusion(const ElementType *) const;
354   void doRequiredTransition();
355   const LeafContentToken *currentPosition() const;
356   Boolean operator==(const MatchState &) const;
357   Boolean operator!=(const MatchState &) const;
358 private:
359   const LeafContentToken *pos_;
360   AndState andState_;
361   unsigned minAndDepth_;
362 };
363 
364 inline
occurrenceIndicator()365 ContentToken::OccurrenceIndicator ContentToken::occurrenceIndicator() const
366 {
367   return occurrenceIndicator_;
368 }
369 
370 inline
index()371 unsigned LeafContentToken::index() const
372 {
373   return leafIndex_;
374 }
375 
376 inline
typeIndex()377 unsigned LeafContentToken::typeIndex() const
378 {
379   return typeIndex_;
380 }
381 
382 inline
inherentlyOptional()383 Boolean ContentToken::inherentlyOptional() const
384 {
385   return inherentlyOptional_;
386 }
387 
388 inline
elementType()389 const ElementType *LeafContentToken::elementType() const
390 {
391   return element_;
392 }
393 
394 inline
andDepth()395 unsigned AndModelGroup::andDepth() const
396 {
397   return andDepth_;
398 }
399 
400 inline
andIndex()401 unsigned AndModelGroup::andIndex() const
402 {
403   return andIndex_;
404 }
405 
406 inline
nMembers()407 unsigned ModelGroup::nMembers() const
408 {
409   return members_.size();
410 }
411 
412 inline
andDepth(const AndModelGroup * andAncestor)413 unsigned ContentToken::andDepth(const AndModelGroup *andAncestor)
414 {
415   return andAncestor ? andAncestor->andDepth() + 1 : 0;
416 }
417 
418 inline
andIndex(const AndModelGroup * andAncestor)419 unsigned ContentToken::andIndex(const AndModelGroup *andAncestor)
420 {
421   return (andAncestor
422 	  ? andAncestor->andIndex() + andAncestor->nMembers()
423 	  : 0);
424 }
425 
426 inline
member(unsigned i)427 ContentToken &ModelGroup::member(unsigned i)
428 {
429   return *members_[i];
430 }
431 
432 inline
member(unsigned i)433 const ContentToken &ModelGroup::member(unsigned i) const
434 {
435   return *members_[i];
436 }
437 
438 inline
setFinal()439 void LeafContentToken::setFinal()
440 {
441   isFinal_ = 1;
442 }
443 
444 inline
isFinal()445 Boolean LeafContentToken::isFinal() const
446 {
447   return isFinal_;
448 }
449 
450 inline
orGroupMember()451 Boolean LeafContentToken::orGroupMember() const
452 {
453   return orGroupMember_;
454 }
455 
456 inline
andStateSize()457 unsigned CompiledModelGroup::andStateSize() const
458 {
459   return andStateSize_;
460 }
461 
462 inline
containsPcdata()463 Boolean CompiledModelGroup::containsPcdata() const
464 {
465   return containsPcdata_;
466 }
467 
468 inline
andAncestor()469 const AndModelGroup *AndModelGroup::andAncestor() const
470 {
471   return andAncestor_;
472 }
473 
474 inline
andGroupIndex()475 unsigned AndModelGroup::andGroupIndex() const
476 {
477   return andGroupIndex_;
478 }
479 
480 inline
initial()481 const LeafContentToken *CompiledModelGroup::initial() const
482 {
483   return initial_.pointer();
484 }
485 
486 inline
modelGroup()487 const ModelGroup *CompiledModelGroup::modelGroup() const
488 {
489   return modelGroup_.pointer();
490 }
491 
492 inline
andAncestor()493 const AndModelGroup *LeafContentToken::andAncestor() const
494 {
495   return andInfo_ ? andInfo_->andAncestor : 0;
496 }
497 
498 inline
andDepth()499 unsigned LeafContentToken::andDepth() const
500 {
501   return andInfo_ ? ContentToken::andDepth(andInfo_->andAncestor) : 0;
502 }
503 
504 inline
computeMinAndDepth(const AndState & andState)505 unsigned LeafContentToken::computeMinAndDepth(const AndState &andState) const
506 {
507   return andInfo_ ? computeMinAndDepth1(andState) : 0;
508 }
509 
510 inline
tryTransitionPcdata(AndState & andState,unsigned & minAndDepth,const LeafContentToken * & newpos)511 Boolean LeafContentToken::tryTransitionPcdata(AndState &andState,
512 					      unsigned &minAndDepth,
513 					      const LeafContentToken *&newpos)
514      const
515 {
516   if (pcdataTransitionType_ == 1) {
517     newpos = simplePcdataTransition_;
518     return 1;
519   }
520   else if (pcdataTransitionType_ == 0)
521     return 0;
522   else
523     return tryTransition(0, andState, minAndDepth, newpos);
524 }
525 
526 inline
tryTransition(const ElementType * to)527 Boolean MatchState::tryTransition(const ElementType *to)
528 {
529   return pos_->tryTransition(to, andState_, minAndDepth_, pos_);
530 }
531 
532 inline
tryTransitionPcdata()533 Boolean MatchState::tryTransitionPcdata()
534 {
535   return pos_->tryTransitionPcdata(andState_, minAndDepth_, pos_);
536 }
537 
538 inline
possibleTransitions(Vector<const ElementType * > & v)539 void MatchState::possibleTransitions(Vector<const ElementType *> &v) const
540 {
541   pos_->possibleTransitions(andState_, minAndDepth_, v);
542 }
543 
544 inline
isFinished()545 Boolean MatchState::isFinished() const
546 {
547   return pos_->isFinal() && minAndDepth_ == 0;
548 }
549 
550 inline
551 const LeafContentToken *
impliedStartTag()552 MatchState::impliedStartTag() const
553 {
554   return pos_->impliedStartTag(andState_, minAndDepth_);
555 }
556 
557 inline
doRequiredTransition()558 void MatchState::doRequiredTransition()
559 {
560   pos_->doRequiredTransition(andState_, minAndDepth_, pos_);
561 }
562 
563 inline
currentPosition()564 const LeafContentToken *MatchState::currentPosition() const
565 {
566   return pos_;
567 }
568 
569 inline
570 Boolean MatchState::operator!=(const MatchState &state) const
571 {
572   return !(*this == state);
573 }
574 
575 inline
isClear(unsigned i)576 Boolean AndState::isClear(unsigned i) const
577 {
578   return v_[i] == 0;
579 }
580 
581 inline
set(unsigned i)582 void AndState::set(unsigned i)
583 {
584   v_[i] = 1;
585   if (i >= clearFrom_)
586     clearFrom_ = i + 1;
587 }
588 
589 inline
clearFrom(unsigned i)590 void AndState::clearFrom(unsigned i)
591 {
592   if (i < clearFrom_)
593     clearFrom1(i);
594 }
595 
596 inline
597 Boolean AndState::operator!=(const AndState &state) const
598 {
599   return !(*this == state);
600 }
601 
602 
603 inline
size()604 size_t FirstSet::size() const
605 {
606   return v_.size();
607 }
608 
609 inline
token(size_t i)610 LeafContentToken *FirstSet::token(size_t i) const
611 {
612   return v_[i];
613 }
614 
615 inline
requiredIndex()616 size_t FirstSet::requiredIndex() const
617 {
618   return requiredIndex_;
619 }
620 
621 inline
setNotRequired()622 void FirstSet::setNotRequired()
623 {
624   requiredIndex_ = size_t(-1);
625 }
626 
627 #ifdef SP_NAMESPACE
628 }
629 #endif
630 
631 #endif /* not ContentToken_INCLUDED */
632