1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident "%Z%%M% %I% %E% SMI"
4
5 #ifndef ContentToken_INCLUDED
6 #define ContentToken_INCLUDED 1
7 #ifdef __GNUG__
8 #pragma interface
9 #endif
10
11 #include "Owner.h"
12 #include "Text.h"
13 #include "Vector.h"
14 #include "NCVector.h"
15 #include "Boolean.h"
16
17 #ifdef SP_NAMESPACE
18 namespace SP_NAMESPACE {
19 #endif
20
21 class LeafContentToken;
22
23 struct SP_API Transition {
24 enum { invalidIndex = -1 };
25 // When performing this transition, reset all andState with index >= this.
26 unsigned clearAndStateStartIndex;
27 // This transition is possible only if all AND groups whose AND depth
28 // is >= this (and contains the LeafContentToken that this transition is
29 // from) have had all their non-nullable members matched.
30 unsigned andDepth;
31 // If this is 1, then this transition requires that the AND group
32 // whose AND depth is andDepth - 1 have a non-nullable member unmatched,
33 // and thus this transition is not ambiguous with a transition whose
34 // AND depth is < andDepth.
35 PackedBoolean isolated;
36 // Index in andState that must be clear for this transition to be
37 // allowed.
38 unsigned requireClear;
39 // Index in andState that is to be set after performing this transition.
40 unsigned toSet;
41 };
42
43 class SP_API FirstSet {
44 public:
45 FirstSet();
46 void init(LeafContentToken *);
47 void append(const FirstSet &);
48 size_t size() const;
49 LeafContentToken *token(size_t i) const;
50 size_t requiredIndex() const;
51 void setNotRequired();
52 private:
53 Vector<LeafContentToken *> v_;
54 // index of contextually required token or -1 if none
55 size_t requiredIndex_;
56 };
57
58 class SP_API LastSet : public Vector<LeafContentToken *> {
59 public:
LastSet()60 LastSet() { }
LastSet(size_t n)61 LastSet(size_t n) : Vector<LeafContentToken *>(n) { }
62 void append(const LastSet &);
63 };
64
65 class ElementType;
66 class AndModelGroup;
67 struct GroupInfo;
68
69 struct SP_API ContentModelAmbiguity {
70 const LeafContentToken *from;
71 const LeafContentToken *to1;
72 const LeafContentToken *to2;
73 unsigned andDepth;
74 };
75
76 class ModelGroup;
77
78 class SP_API ContentToken {
79 public:
80 enum OccurrenceIndicator { none = 0, opt = 01, plus = 02, rep = 03 };
81 ContentToken(OccurrenceIndicator);
82 virtual ~ContentToken();
83 OccurrenceIndicator occurrenceIndicator() const;
84 Boolean inherentlyOptional() const;
85 static unsigned andDepth(const AndModelGroup *);
86 static unsigned andIndex(const AndModelGroup *);
87 void analyze(GroupInfo &, const AndModelGroup *, unsigned,
88 FirstSet &, LastSet &);
89 static void addTransitions(const LastSet &from,
90 const FirstSet &to,
91 Boolean maybeRequired,
92 unsigned andClearIndex,
93 unsigned andDepth,
94 Boolean isolated = 0,
95 unsigned requireClear
96 = (unsigned)Transition::invalidIndex,
97 unsigned toSet
98 = (unsigned)Transition::invalidIndex);
99 virtual void finish(Vector<unsigned> &minAndDepth,
100 Vector<size_t> &elementTransition,
101 Vector<ContentModelAmbiguity> &,
102 Boolean &pcdataUnreachable) = 0;
103 virtual unsigned long grpgtcnt() const;
104 virtual void setOrGroupMember();
105 unsigned andGroupIndex() const;
106 virtual const ModelGroup *asModelGroup() const;
107 virtual const LeafContentToken *asLeafContentToken() const;
108 protected:
109 PackedBoolean inherentlyOptional_;
110 private:
111 ContentToken(const ContentToken &); // undefined
112 void operator=(const ContentToken &); // undefined
113 virtual void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
114 FirstSet &, LastSet &) = 0;
115 OccurrenceIndicator occurrenceIndicator_;
116 };
117
118 class SP_API ModelGroup : public ContentToken {
119 public:
120 enum Connector { andConnector, orConnector, seqConnector };
121 ModelGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
122 virtual Connector connector() const = 0;
123 unsigned nMembers() const;
124 void finish(Vector<unsigned> &minAndDepth,
125 Vector<size_t> &elementTransition,
126 Vector<ContentModelAmbiguity> &,
127 Boolean &pcdataUnreachable);
128 ContentToken &member(unsigned i);
129 const ContentToken &member(unsigned i) const;
130 unsigned long grpgtcnt() const;
131 const ModelGroup *asModelGroup() const;
132 protected:
133 void setOrGroup();
134 private:
135 ModelGroup(const ModelGroup &); // undefined
136 void operator=(const ModelGroup &); // undefined
137 NCVector<Owner<ContentToken> > members_;
138 };
139
140 class AndModelGroup : public ModelGroup {
141 public:
142 AndModelGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
143 Connector connector() const;
144 unsigned andDepth() const;
145 unsigned andIndex() const;
146 unsigned andGroupIndex() const;
147 const AndModelGroup *andAncestor() const;
148 private:
149 AndModelGroup(const AndModelGroup &); // undefined
150 void operator=(const AndModelGroup &); // undefined
151 unsigned andDepth_; // number of and groups that contain this
152 unsigned andIndex_;
153 unsigned andGroupIndex_;
154 const AndModelGroup *andAncestor_;
155 void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
156 FirstSet &, LastSet &);
157 };
158
159 class OrModelGroup : public ModelGroup {
160 public:
161 OrModelGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
162 Connector connector() const;
163 private:
164 OrModelGroup(const OrModelGroup &); // undefined
165 void operator=(const OrModelGroup &); // undefined
166 void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
167 FirstSet &, LastSet &);
168 };
169
170 class SeqModelGroup : public ModelGroup {
171 public:
172 SeqModelGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
173 Connector connector() const;
174 private:
175 SeqModelGroup(const SeqModelGroup &); // undefined
176 void operator=(const SeqModelGroup &); // undefined
177 void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
178 FirstSet &, LastSet &);
179 };
180
181 class AndState;
182
183 class SP_API AndInfo {
184 public:
AndInfo()185 AndInfo() { }
186 const AndModelGroup *andAncestor;
187 unsigned andGroupIndex;
188 Vector<Transition> follow;
189 private:
190 AndInfo(const AndInfo &); // undefined
191 void operator=(const AndInfo &); // undefined
192 };
193
194 // A LeafContentToken is not quite the same as a primitive content token.
195 // A data tag group is a primitive content token but not a LeafContentToken.
196
197 class SP_API LeafContentToken : public ContentToken {
198 public:
199 LeafContentToken(const ElementType *, OccurrenceIndicator);
200 unsigned index() const;
201 unsigned typeIndex() const;
202 const ElementType *elementType() const;
203 virtual Boolean isInitial() const;
204 void addTransitions(const FirstSet &to,
205 Boolean maybeRequired,
206 unsigned andClearIndex,
207 unsigned andDepth,
208 Boolean isolated,
209 unsigned requireClear,
210 unsigned toSet);
211 void setFinal();
212 void finish(Vector<unsigned> &minAndDepth,
213 Vector<size_t> &elementTransition,
214 Vector<ContentModelAmbiguity> &,
215 Boolean &pcdataUnreachable);
216 Boolean isFinal() const;
217 Boolean tryTransition(const ElementType *, AndState &,
218 unsigned &minAndDepth,
219 const LeafContentToken *&newpos) const;
220 Boolean tryTransitionPcdata(AndState &, unsigned &minAndDepth,
221 const LeafContentToken *&newpos) const;
222 void possibleTransitions(const AndState &, unsigned minAndDepth, Vector<const ElementType *> &) const;
223 const LeafContentToken *impliedStartTag(const AndState &andpos,
224 unsigned minAndDepth) const;
225 const LeafContentToken *transitionToken(const ElementType *to,
226 const AndState &andState,
227 unsigned minAndDepth) const;
228 void doRequiredTransition(AndState &andState,
229 unsigned &minAndDepth,
230 const LeafContentToken *&newpos) const;
231 unsigned computeMinAndDepth(const AndState&) const;
232 Boolean orGroupMember() const;
233 void setOrGroupMember();
234 const AndModelGroup *andAncestor() const;
235 unsigned andDepth() const;
236 const LeafContentToken *asLeafContentToken() const;
237 protected:
238 void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
239 FirstSet &, LastSet &);
240 const ElementType *element_;
241 private:
242 LeafContentToken(const LeafContentToken &); // undefined
243 void operator=(const LeafContentToken &); // undefined
244 void andFinish(Vector<unsigned> &minAndDepth,
245 Vector<size_t> &elementTransition,
246 Vector<ContentModelAmbiguity> &,
247 Boolean &pcdataUnreachable);
248 unsigned computeMinAndDepth1(const AndState&) const;
249 unsigned leafIndex_;
250 unsigned typeIndex_;
251 Vector<LeafContentToken *> follow_;
252 PackedBoolean isFinal_;
253 PackedBoolean orGroupMember_;
254 // 0 none, 1 yes - simple, 2 - compled
255 char pcdataTransitionType_;
256 const LeafContentToken *simplePcdataTransition_;
257 size_t requiredIndex_;
258 Owner<AndInfo> andInfo_;
259 };
260
261 class PcdataToken : public LeafContentToken {
262 public:
263 PcdataToken();
264 void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
265 FirstSet &, LastSet &);
266 private:
267 PcdataToken(const PcdataToken &); // undefined
268 void operator=(const PcdataToken &); // undefined
269 };
270
271 class InitialPseudoToken : public LeafContentToken {
272 public:
273 InitialPseudoToken();
274 Boolean isInitial() const;
275 private:
276 InitialPseudoToken(const InitialPseudoToken &); // undefined
277 void operator=(const InitialPseudoToken &); // undefined
278 };
279
280 class ElementToken : public LeafContentToken {
281 public:
282 ElementToken(const ElementType *, OccurrenceIndicator);
283 private:
284 ElementToken(const ElementToken &); // undefined
285 void operator=(const ElementToken &); // undefined
286 };
287
288 class DataTagGroup : public SeqModelGroup {
289 public:
290 // first content token is a DataTagElementToken, second is PcdataToken
291 DataTagGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
292 private:
293 DataTagGroup(const DataTagGroup &); // undefined
294 void operator=(const DataTagGroup &); // undefined
295 };
296
297 class DataTagElementToken : public ElementToken {
298 public:
299 DataTagElementToken(const ElementType *, Vector<Text> &templates);
300 DataTagElementToken(const ElementType *, Vector<Text> &templates,
301 Text &paddingTemplate);
302 private:
303 DataTagElementToken(const DataTagElementToken &); // undefined
304 void operator=(const DataTagElementToken &); // undefined
305 Vector<Text> templates_;
306 Boolean havePaddingTemplate_;
307 Text paddingTemplate_;
308 };
309
310 class SP_API CompiledModelGroup {
311 public:
312 CompiledModelGroup(Owner<ModelGroup> &);
313 void compile(size_t nElementTypeIndex,
314 Vector<ContentModelAmbiguity> &,
315 Boolean &pcdataUnreachable);
316 CompiledModelGroup *copy() const;
317 const LeafContentToken *initial() const;
318 unsigned andStateSize() const;
319 Boolean containsPcdata() const;
320 const ModelGroup *modelGroup() const;
321 private:
322 CompiledModelGroup(const CompiledModelGroup &); // undefined
323 void operator=(const CompiledModelGroup &); // undefined
324 Owner<ModelGroup> modelGroup_;
325 Owner<LeafContentToken> initial_;
326 unsigned andStateSize_;
327 Boolean containsPcdata_;
328 };
329
330 class SP_API AndState {
331 public:
332 AndState(unsigned);
333 Boolean isClear(unsigned) const;
334 void clearFrom(unsigned);
335 void set(unsigned);
336 Boolean operator==(const AndState &) const;
337 Boolean operator!=(const AndState &) const;
338 private:
339 void clearFrom1(unsigned);
340 unsigned clearFrom_;
341 Vector<PackedBoolean> v_;
342 };
343
344 class SP_API MatchState {
345 public:
346 MatchState();
347 MatchState(const CompiledModelGroup *); // may be 0
348 Boolean tryTransition(const ElementType *);
349 Boolean tryTransitionPcdata();
350 void possibleTransitions(Vector<const ElementType *> &) const;
351 Boolean isFinished() const;
352 const LeafContentToken *impliedStartTag() const;
353 const LeafContentToken *invalidExclusion(const ElementType *) const;
354 void doRequiredTransition();
355 const LeafContentToken *currentPosition() const;
356 Boolean operator==(const MatchState &) const;
357 Boolean operator!=(const MatchState &) const;
358 private:
359 const LeafContentToken *pos_;
360 AndState andState_;
361 unsigned minAndDepth_;
362 };
363
364 inline
occurrenceIndicator()365 ContentToken::OccurrenceIndicator ContentToken::occurrenceIndicator() const
366 {
367 return occurrenceIndicator_;
368 }
369
370 inline
index()371 unsigned LeafContentToken::index() const
372 {
373 return leafIndex_;
374 }
375
376 inline
typeIndex()377 unsigned LeafContentToken::typeIndex() const
378 {
379 return typeIndex_;
380 }
381
382 inline
inherentlyOptional()383 Boolean ContentToken::inherentlyOptional() const
384 {
385 return inherentlyOptional_;
386 }
387
388 inline
elementType()389 const ElementType *LeafContentToken::elementType() const
390 {
391 return element_;
392 }
393
394 inline
andDepth()395 unsigned AndModelGroup::andDepth() const
396 {
397 return andDepth_;
398 }
399
400 inline
andIndex()401 unsigned AndModelGroup::andIndex() const
402 {
403 return andIndex_;
404 }
405
406 inline
nMembers()407 unsigned ModelGroup::nMembers() const
408 {
409 return members_.size();
410 }
411
412 inline
andDepth(const AndModelGroup * andAncestor)413 unsigned ContentToken::andDepth(const AndModelGroup *andAncestor)
414 {
415 return andAncestor ? andAncestor->andDepth() + 1 : 0;
416 }
417
418 inline
andIndex(const AndModelGroup * andAncestor)419 unsigned ContentToken::andIndex(const AndModelGroup *andAncestor)
420 {
421 return (andAncestor
422 ? andAncestor->andIndex() + andAncestor->nMembers()
423 : 0);
424 }
425
426 inline
member(unsigned i)427 ContentToken &ModelGroup::member(unsigned i)
428 {
429 return *members_[i];
430 }
431
432 inline
member(unsigned i)433 const ContentToken &ModelGroup::member(unsigned i) const
434 {
435 return *members_[i];
436 }
437
438 inline
setFinal()439 void LeafContentToken::setFinal()
440 {
441 isFinal_ = 1;
442 }
443
444 inline
isFinal()445 Boolean LeafContentToken::isFinal() const
446 {
447 return isFinal_;
448 }
449
450 inline
orGroupMember()451 Boolean LeafContentToken::orGroupMember() const
452 {
453 return orGroupMember_;
454 }
455
456 inline
andStateSize()457 unsigned CompiledModelGroup::andStateSize() const
458 {
459 return andStateSize_;
460 }
461
462 inline
containsPcdata()463 Boolean CompiledModelGroup::containsPcdata() const
464 {
465 return containsPcdata_;
466 }
467
468 inline
andAncestor()469 const AndModelGroup *AndModelGroup::andAncestor() const
470 {
471 return andAncestor_;
472 }
473
474 inline
andGroupIndex()475 unsigned AndModelGroup::andGroupIndex() const
476 {
477 return andGroupIndex_;
478 }
479
480 inline
initial()481 const LeafContentToken *CompiledModelGroup::initial() const
482 {
483 return initial_.pointer();
484 }
485
486 inline
modelGroup()487 const ModelGroup *CompiledModelGroup::modelGroup() const
488 {
489 return modelGroup_.pointer();
490 }
491
492 inline
andAncestor()493 const AndModelGroup *LeafContentToken::andAncestor() const
494 {
495 return andInfo_ ? andInfo_->andAncestor : 0;
496 }
497
498 inline
andDepth()499 unsigned LeafContentToken::andDepth() const
500 {
501 return andInfo_ ? ContentToken::andDepth(andInfo_->andAncestor) : 0;
502 }
503
504 inline
computeMinAndDepth(const AndState & andState)505 unsigned LeafContentToken::computeMinAndDepth(const AndState &andState) const
506 {
507 return andInfo_ ? computeMinAndDepth1(andState) : 0;
508 }
509
510 inline
tryTransitionPcdata(AndState & andState,unsigned & minAndDepth,const LeafContentToken * & newpos)511 Boolean LeafContentToken::tryTransitionPcdata(AndState &andState,
512 unsigned &minAndDepth,
513 const LeafContentToken *&newpos)
514 const
515 {
516 if (pcdataTransitionType_ == 1) {
517 newpos = simplePcdataTransition_;
518 return 1;
519 }
520 else if (pcdataTransitionType_ == 0)
521 return 0;
522 else
523 return tryTransition(0, andState, minAndDepth, newpos);
524 }
525
526 inline
tryTransition(const ElementType * to)527 Boolean MatchState::tryTransition(const ElementType *to)
528 {
529 return pos_->tryTransition(to, andState_, minAndDepth_, pos_);
530 }
531
532 inline
tryTransitionPcdata()533 Boolean MatchState::tryTransitionPcdata()
534 {
535 return pos_->tryTransitionPcdata(andState_, minAndDepth_, pos_);
536 }
537
538 inline
possibleTransitions(Vector<const ElementType * > & v)539 void MatchState::possibleTransitions(Vector<const ElementType *> &v) const
540 {
541 pos_->possibleTransitions(andState_, minAndDepth_, v);
542 }
543
544 inline
isFinished()545 Boolean MatchState::isFinished() const
546 {
547 return pos_->isFinal() && minAndDepth_ == 0;
548 }
549
550 inline
551 const LeafContentToken *
impliedStartTag()552 MatchState::impliedStartTag() const
553 {
554 return pos_->impliedStartTag(andState_, minAndDepth_);
555 }
556
557 inline
doRequiredTransition()558 void MatchState::doRequiredTransition()
559 {
560 pos_->doRequiredTransition(andState_, minAndDepth_, pos_);
561 }
562
563 inline
currentPosition()564 const LeafContentToken *MatchState::currentPosition() const
565 {
566 return pos_;
567 }
568
569 inline
570 Boolean MatchState::operator!=(const MatchState &state) const
571 {
572 return !(*this == state);
573 }
574
575 inline
isClear(unsigned i)576 Boolean AndState::isClear(unsigned i) const
577 {
578 return v_[i] == 0;
579 }
580
581 inline
set(unsigned i)582 void AndState::set(unsigned i)
583 {
584 v_[i] = 1;
585 if (i >= clearFrom_)
586 clearFrom_ = i + 1;
587 }
588
589 inline
clearFrom(unsigned i)590 void AndState::clearFrom(unsigned i)
591 {
592 if (i < clearFrom_)
593 clearFrom1(i);
594 }
595
596 inline
597 Boolean AndState::operator!=(const AndState &state) const
598 {
599 return !(*this == state);
600 }
601
602
603 inline
size()604 size_t FirstSet::size() const
605 {
606 return v_.size();
607 }
608
609 inline
token(size_t i)610 LeafContentToken *FirstSet::token(size_t i) const
611 {
612 return v_[i];
613 }
614
615 inline
requiredIndex()616 size_t FirstSet::requiredIndex() const
617 {
618 return requiredIndex_;
619 }
620
621 inline
setNotRequired()622 void FirstSet::setNotRequired()
623 {
624 requiredIndex_ = size_t(-1);
625 }
626
627 #ifdef SP_NAMESPACE
628 }
629 #endif
630
631 #endif /* not ContentToken_INCLUDED */
632