1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident	"%Z%%M%	%I%	%E% SMI"
4 
5 #ifdef __GNUG__
6 #pragma implementation
7 #endif
8 #include "splib.h"
9 #include <stdlib.h>
10 #include "ContentToken.h"
11 #include "macros.h"
12 #include "ElementType.h"
13 #include "Vector.h"
14 #include "Dtd.h"
15 #include "MessageArg.h"
16 
17 #ifdef SP_NAMESPACE
18 namespace SP_NAMESPACE {
19 #endif
20 
AndModelGroup(NCVector<Owner<ContentToken>> & v,ContentToken::OccurrenceIndicator oi)21 AndModelGroup::AndModelGroup(NCVector<Owner<ContentToken> > &v,
22 			     ContentToken::OccurrenceIndicator oi)
23 : ModelGroup(v, oi)
24 {
25 }
26 
connector() const27 ModelGroup::Connector AndModelGroup::connector() const
28 {
29   return andConnector;
30 }
31 
OrModelGroup(NCVector<Owner<ContentToken>> & v,ContentToken::OccurrenceIndicator oi)32 OrModelGroup::OrModelGroup(NCVector<Owner<ContentToken> > &v,
33 			   ContentToken::OccurrenceIndicator oi)
34 : ModelGroup(v, oi)
35 {
36   setOrGroup();
37 }
38 
connector() const39 ModelGroup::Connector OrModelGroup::connector() const
40 {
41   return orConnector;
42 }
43 
44 
SeqModelGroup(NCVector<Owner<ContentToken>> & v,ContentToken::OccurrenceIndicator oi)45 SeqModelGroup::SeqModelGroup(NCVector<Owner<ContentToken> > &v,
46 			     ContentToken::OccurrenceIndicator oi)
47 : ModelGroup(v, oi)
48 {
49 }
50 
connector() const51 ModelGroup::Connector SeqModelGroup::connector() const
52 {
53   return seqConnector;
54 }
55 
56 
ModelGroup(NCVector<Owner<ContentToken>> & v,OccurrenceIndicator oi)57 ModelGroup::ModelGroup(NCVector<Owner<ContentToken> > &v,
58 		       OccurrenceIndicator oi)
59 : ContentToken(oi)
60 {
61   members_.swap(v);
62 }
63 
grpgtcnt() const64 unsigned long ModelGroup::grpgtcnt() const
65 {
66   unsigned long cnt = 1;
67   for (size_t i = 0; i < members_.size(); i++)
68     cnt += members_[i]->grpgtcnt();
69   return cnt;
70 }
71 
setOrGroup()72 void ModelGroup::setOrGroup()
73 {
74   for (size_t i = 0; i < members_.size(); i++)
75     members_[i]->setOrGroupMember();
76 }
77 
asModelGroup() const78 const ModelGroup *ModelGroup::asModelGroup() const
79 {
80   return this;
81 }
82 
ElementToken(const ElementType * element,OccurrenceIndicator oi)83 ElementToken::ElementToken(const ElementType *element, OccurrenceIndicator oi)
84 : LeafContentToken(element, oi)
85 {
86 }
87 
ContentToken(OccurrenceIndicator oi)88 ContentToken::ContentToken(OccurrenceIndicator oi)
89 : occurrenceIndicator_(oi)
90 {
91 }
92 
grpgtcnt() const93 unsigned long ContentToken::grpgtcnt() const
94 {
95   return 1;
96 }
97 
setOrGroupMember()98 void ContentToken::setOrGroupMember()
99 {
100 }
101 
asModelGroup() const102 const ModelGroup *ContentToken::asModelGroup() const
103 {
104   return 0;
105 }
106 
asLeafContentToken() const107 const LeafContentToken *ContentToken::asLeafContentToken() const
108 {
109   return 0;
110 }
111 
LeafContentToken(const ElementType * element,OccurrenceIndicator oi)112 LeafContentToken::LeafContentToken(const ElementType *element,
113 				   OccurrenceIndicator oi)
114 : element_(element), ContentToken(oi), isFinal_(0), orGroupMember_(0),
115   requiredIndex_(size_t(-1))
116 {
117 }
118 
isInitial() const119 Boolean LeafContentToken::isInitial() const
120 {
121   return 0;
122 }
123 
setOrGroupMember()124 void LeafContentToken::setOrGroupMember()
125 {
126   orGroupMember_ = 1;
127 }
128 
asLeafContentToken() const129 const LeafContentToken *LeafContentToken::asLeafContentToken() const
130 {
131   return this;
132 }
133 
PcdataToken()134 PcdataToken::PcdataToken()
135 : LeafContentToken(0, rep)
136 {
137 }
138 
InitialPseudoToken()139 InitialPseudoToken::InitialPseudoToken()
140 : LeafContentToken(0, none)
141 {
142 }
143 
isInitial() const144 Boolean InitialPseudoToken::isInitial() const
145 {
146   return 1;
147 }
148 
DataTagGroup(NCVector<Owner<ContentToken>> & vec,OccurrenceIndicator oi)149 DataTagGroup::DataTagGroup(NCVector<Owner<ContentToken> > &vec,
150 			   OccurrenceIndicator oi)
151 : SeqModelGroup(vec, oi)
152 {
153 }
154 
DataTagElementToken(const ElementType * element,Vector<Text> & templates,Text & paddingTemplate)155 DataTagElementToken::DataTagElementToken(const ElementType *element,
156 					 Vector<Text> &templates,
157 					 Text &paddingTemplate)
158 : ElementToken(element, ContentToken::none),
159   havePaddingTemplate_(1)
160 {
161   templates.swap(templates_);
162   paddingTemplate.swap(paddingTemplate_);
163 }
164 
DataTagElementToken(const ElementType * element,Vector<Text> & templates)165 DataTagElementToken::DataTagElementToken(const ElementType *element,
166 					 Vector<Text> &templates)
167 : ElementToken(element, ContentToken::none),
168   havePaddingTemplate_(0)
169 {
170   templates.swap(templates_);
171 }
172 
~ContentToken()173 ContentToken::~ContentToken()
174 {
175 }
176 
177 struct GroupInfo {
178   unsigned nextLeafIndex;
179   PackedBoolean containsPcdata;
180   unsigned andStateSize;
181   Vector<unsigned> nextTypeIndex;
182   GroupInfo(size_t);
183 };
184 
185 
GroupInfo(size_t nType)186 GroupInfo::GroupInfo(size_t nType)
187 : nextTypeIndex(nType, 0), nextLeafIndex(0), containsPcdata(0), andStateSize(0)
188 {
189 }
190 
CompiledModelGroup(Owner<ModelGroup> & modelGroup)191 CompiledModelGroup::CompiledModelGroup(Owner<ModelGroup> &modelGroup)
192 : modelGroup_(modelGroup.extract())
193 {
194 }
195 
compile(size_t nElementTypeIndex,Vector<ContentModelAmbiguity> & ambiguities,Boolean & pcdataUnreachable)196 void CompiledModelGroup::compile(size_t nElementTypeIndex,
197 				 Vector<ContentModelAmbiguity> &ambiguities,
198 				 Boolean &pcdataUnreachable)
199 {
200   FirstSet first;
201   LastSet last;
202   GroupInfo info(nElementTypeIndex);
203   modelGroup_->analyze(info, 0, 0, first, last);
204   for (unsigned i = 0; i < last.size(); i++)
205     last[i]->setFinal();
206   andStateSize_ = info.andStateSize;
207   containsPcdata_ = info.containsPcdata;
208   initial_ = new InitialPseudoToken;
209   LastSet initialSet(1);
210   initialSet[0] = initial_.pointer();
211   ContentToken::addTransitions(initialSet, first, 1, 0, 0);
212   if (modelGroup_->inherentlyOptional())
213     initial_->setFinal();
214   pcdataUnreachable = 0;
215   Vector<unsigned> minAndDepth(info.nextLeafIndex);
216   Vector<size_t> elementTransition(nElementTypeIndex);
217   initial_->finish(minAndDepth, elementTransition, ambiguities,
218 		   pcdataUnreachable);
219   modelGroup_->finish(minAndDepth, elementTransition, ambiguities,
220 		      pcdataUnreachable);
221   if (!containsPcdata_)
222     pcdataUnreachable = 0;
223 }
224 
finish(Vector<unsigned> & minAndDepth,Vector<size_t> & elementTransition,Vector<ContentModelAmbiguity> & ambiguities,Boolean & pcdataUnreachable)225 void ModelGroup::finish(Vector<unsigned> &minAndDepth,
226 			Vector<size_t> &elementTransition,
227 			Vector<ContentModelAmbiguity> &ambiguities,
228 			Boolean &pcdataUnreachable)
229 {
230   for (unsigned i = 0; i < nMembers(); i++)
231     member(i).finish(minAndDepth, elementTransition, ambiguities,
232 		     pcdataUnreachable);
233 }
234 
finish(Vector<unsigned> & minAndDepthVec,Vector<size_t> & elementTransitionVec,Vector<ContentModelAmbiguity> & ambiguities,Boolean & pcdataUnreachable)235 void LeafContentToken::finish(Vector<unsigned> &minAndDepthVec,
236 			      Vector<size_t> &elementTransitionVec,
237 			      Vector<ContentModelAmbiguity> &ambiguities,
238 			      Boolean &pcdataUnreachable)
239 {
240   if (andInfo_) {
241     andFinish(minAndDepthVec, elementTransitionVec, ambiguities,
242 	      pcdataUnreachable);
243     return;
244   }
245   Vector<size_t>::iterator elementTransition = elementTransitionVec.begin();
246   Vector<unsigned>::iterator minAndDepth = minAndDepthVec.begin();
247   minAndDepthVec.assign(minAndDepthVec.size(), unsigned(-1));
248   elementTransitionVec.assign(elementTransitionVec.size(), size_t(-1));
249   pcdataTransitionType_ = 0;
250   simplePcdataTransition_ = 0;
251   // follow_ is in decreasing order of andDepth because of how it's
252   // constructed.
253   size_t n = follow_.size();
254   Vector<LeafContentToken *>::iterator follow = follow_.begin();
255   size_t j = 0;
256   for (size_t i = 0; i < n; i++) {
257     unsigned &minDepth = minAndDepth[follow[i]->index()];
258     if (minDepth) {
259       minDepth = 0;
260       if (j != i)
261 	follow[j] = follow[i];
262       if (i == requiredIndex_)
263 	requiredIndex_ = j;
264       const ElementType *e = follow[i]->elementType();
265       unsigned ei;
266       if (e == 0) {
267 	if (follow[i]->andInfo_ == 0) {
268 	  simplePcdataTransition_ = follow[i];
269 	  pcdataTransitionType_ = 1;
270 	}
271 	else
272 	  pcdataTransitionType_ = 2;
273 	ei = 0;
274       }
275       else
276 	ei = e->index();
277       if (elementTransition[ei] != size_t(-1)) {
278 	const LeafContentToken *prev = follow[elementTransition[ei]];
279 	// This might not be true: consider (a & b?)*; after the
280 	// a there are two different ways to get to the same b,
281 	// with the same and depth.
282 	if (follow[i] != prev) {
283 	  ambiguities.resize(ambiguities.size() + 1);
284 	  ContentModelAmbiguity &a = ambiguities.back();
285 	  a.from = this;
286 	  a.to1 = prev;
287 	  a.to2 = follow[i];
288 	  a.andDepth = 0;
289 	}
290       }
291       elementTransition[ei] = j;
292       j++;
293     }
294   }
295   if (pcdataTransitionType_ == 0)
296     pcdataUnreachable = 1;
297   follow_.resize(j);
298 }
299 
andFinish(Vector<unsigned> & minAndDepthVec,Vector<size_t> & elementTransitionVec,Vector<ContentModelAmbiguity> & ambiguities,Boolean & pcdataUnreachable)300 void LeafContentToken::andFinish(Vector<unsigned> &minAndDepthVec,
301 				 Vector<size_t> &elementTransitionVec,
302 				 Vector<ContentModelAmbiguity> &ambiguities,
303 				 Boolean &pcdataUnreachable)
304 {
305   // Vector mapping element type index to index of leaf content token
306   // of that type to which there is a transition, which is the "worst"
307   // from the point of view of ambiguity.
308   Vector<size_t>::iterator elementTransition = elementTransitionVec.begin();
309   // Vector mapping index of leaf content token
310   // to minimum AND depth of transition to that token.
311   Vector<unsigned>::iterator minAndDepth = minAndDepthVec.begin();
312   minAndDepthVec.assign(minAndDepthVec.size(), unsigned(-1));
313   elementTransitionVec.assign(elementTransitionVec.size(), size_t(-1));
314   pcdataTransitionType_ = 0;
315   simplePcdataTransition_ = 0;
316   unsigned pcdataMinCovered = 0;
317 
318   // follow_ is in decreasing order of andDepth because of how it's
319   // constructed.
320   size_t n = follow_.size();
321   size_t j = 0;
322   Vector<Transition>::iterator andFollow = andInfo_->follow.begin();
323   for (size_t i = 0; i < n; i++) {
324     unsigned &minDepth = minAndDepth[follow_[i]->index()];
325     // ignore transitions to the same token with the same and depth.
326     if (andFollow[i].andDepth < minDepth) {
327       minDepth = andFollow[i].andDepth;
328       if (j != i) {
329 	follow_[j] = follow_[i];
330 	andFollow[j] = andFollow[i];
331       }
332       if (i == requiredIndex_)
333 	requiredIndex_ = j;
334       const ElementType *e = follow_[i]->elementType();
335       unsigned ei;
336       if (e == 0) {
337 	if (pcdataTransitionType_ == 0) {
338 	  const AndModelGroup *andAncestor = andInfo_->andAncestor;
339 	  unsigned groupIndex = andInfo_->andGroupIndex;
340 	  do {
341 	    Boolean hasNonNull = 0;
342 	    for (unsigned k = 0; k < andAncestor->nMembers(); k++)
343 	      if (k != groupIndex
344 		  && !andAncestor->member(k).inherentlyOptional()) {
345 		hasNonNull = 1;
346 		break;
347 	      }
348 	    if (hasNonNull) {
349 	      if (minDepth <= andAncestor->andDepth())
350 		pcdataUnreachable = 1;
351 	      break;
352 	    }
353 	    groupIndex = andAncestor->andGroupIndex();
354 	    andAncestor = andAncestor->andAncestor();
355 	  } while (andAncestor);
356 	  if (andFollow[i].isolated)
357 	    pcdataMinCovered = minDepth;
358 	  pcdataTransitionType_ = 2;
359 	}
360 	else {
361 	  if (pcdataMinCovered > minDepth + 1)
362 	    pcdataUnreachable = 1;
363 	  pcdataMinCovered = andFollow[i].isolated ? minDepth : 0;
364 	}
365 	ei = 0;
366       }
367       else
368 	ei = e->index();
369       // If we have transitions t1, t2, ... tN to tokens having
370       // the same element type, with
371       // and-depths d1, d2, ... dN, where d1 >= d2 >= ... >= dN,
372       // then there is an ambiguity unless
373       // d1 > d2 > ... > dN and t1, t2, ... , tN-1 are all isolated.
374       size_t previ = elementTransition[ei];
375       if (previ != size_t(-1)) {
376 	const LeafContentToken *prev = follow_[previ];
377 	// This might not be true: consider (a & b?)*; after the
378 	// a there are two different ways to get to the same b,
379 	// with the same and depth.
380 	if (follow_[i] != prev
381 	    && (andFollow[previ].andDepth == andFollow[i].andDepth
382 		|| !andFollow[previ].isolated)) {
383 	  ambiguities.resize(ambiguities.size() + 1);
384 	  ContentModelAmbiguity &a = ambiguities.back();
385 	  a.from = this;
386 	  a.to1 = prev;
387 	  a.to2 = follow_[i];
388 	  a.andDepth = andFollow[i].andDepth;
389 	}
390 	if (andFollow[previ].isolated)
391 	  elementTransition[ei] = j;
392       }
393       else
394 	elementTransition[ei] = j;
395       j++;
396     }
397   }
398   if (pcdataMinCovered > 0 || pcdataTransitionType_ == 0)
399     pcdataUnreachable = 1;
400   follow_.resize(j);
401   andInfo_->follow.resize(j);
402 }
403 
analyze(GroupInfo & info,const AndModelGroup * andAncestor,unsigned andGroupIndex,FirstSet & first,LastSet & last)404 void ContentToken::analyze(GroupInfo &info,
405 			   const AndModelGroup *andAncestor,
406 			   unsigned andGroupIndex,
407 			   FirstSet &first,
408 			   LastSet &last)
409 {
410   analyze1(info, andAncestor, andGroupIndex, first, last);
411   if (occurrenceIndicator_ & opt)
412     inherentlyOptional_ = 1;
413   if (inherentlyOptional_)
414     first.setNotRequired();
415   if (occurrenceIndicator_ & plus)
416     addTransitions(last, first, 0,
417 		   andIndex(andAncestor), andDepth(andAncestor));
418 }
419 
analyze1(GroupInfo & info,const AndModelGroup * andAncestor,unsigned andGroupIndex,FirstSet & first,LastSet & last)420 void LeafContentToken::analyze1(GroupInfo &info,
421 				const AndModelGroup *andAncestor,
422 				unsigned andGroupIndex,
423 				FirstSet &first,
424 				LastSet &last)
425 {
426   leafIndex_ = info.nextLeafIndex++;
427   typeIndex_ = info.nextTypeIndex[element_ ? element_->index() : 0]++;
428   if (andAncestor) {
429     andInfo_ = new AndInfo;
430     andInfo_->andAncestor = andAncestor;
431     andInfo_->andGroupIndex = andGroupIndex;
432   }
433   first.init(this);
434   last.assign(1, this);
435   inherentlyOptional_ = 0;
436 }
437 
analyze1(GroupInfo & info,const AndModelGroup * andAncestor,unsigned andGroupIndex,FirstSet & first,LastSet & last)438 void PcdataToken::analyze1(GroupInfo &info,
439 			   const AndModelGroup *andAncestor,
440 			   unsigned andGroupIndex,
441 			   FirstSet &first,
442 			   LastSet &last)
443 {
444   info.containsPcdata = 1;
445   LeafContentToken::analyze1(info, andAncestor, andGroupIndex, first, last);
446 }
447 
analyze1(GroupInfo & info,const AndModelGroup * andAncestor,unsigned andGroupIndex,FirstSet & first,LastSet & last)448 void OrModelGroup::analyze1(GroupInfo &info,
449 			    const AndModelGroup *andAncestor,
450 			    unsigned andGroupIndex,
451 			    FirstSet &first,
452 			    LastSet &last)
453 {
454   member(0).analyze(info, andAncestor, andGroupIndex, first, last);
455   first.setNotRequired();
456   inherentlyOptional_ = member(0).inherentlyOptional();
457   for (unsigned i = 1; i < nMembers(); i++) {
458     FirstSet tempFirst;
459     LastSet tempLast;
460     member(i).analyze(info, andAncestor, andGroupIndex, tempFirst, tempLast);
461     first.append(tempFirst);
462     first.setNotRequired();
463     last.append(tempLast);
464     inherentlyOptional_ |= member(i).inherentlyOptional();
465   }
466 }
467 
analyze1(GroupInfo & info,const AndModelGroup * andAncestor,unsigned andGroupIndex,FirstSet & first,LastSet & last)468 void SeqModelGroup::analyze1(GroupInfo &info,
469 			     const AndModelGroup *andAncestor,
470 			     unsigned andGroupIndex,
471 			     FirstSet &first,
472 			     LastSet &last)
473 {
474   member(0).analyze(info, andAncestor, andGroupIndex, first, last);
475   inherentlyOptional_ = member(0).inherentlyOptional();
476   for (unsigned i = 1; i < nMembers(); i++) {
477     FirstSet tempFirst;
478     LastSet tempLast;
479     member(i).analyze(info, andAncestor, andGroupIndex, tempFirst, tempLast);
480     addTransitions(last, tempFirst, 1,
481 		   andIndex(andAncestor), andDepth(andAncestor));
482     if (inherentlyOptional_)
483       first.append(tempFirst);
484     if (member(i).inherentlyOptional())
485       last.append(tempLast);
486     else
487       tempLast.swap(last);
488     inherentlyOptional_ &= member(i).inherentlyOptional();
489   }
490 }
491 
analyze1(GroupInfo & info,const AndModelGroup * andAncestor,unsigned andGroupIndex,FirstSet & first,LastSet & last)492 void AndModelGroup::analyze1(GroupInfo &info,
493 			     const AndModelGroup *andAncestor,
494 			     unsigned andGroupIndex,
495 			     FirstSet &first,
496 			     LastSet &last)
497 {
498   andDepth_ = ContentToken::andDepth(andAncestor);
499   andIndex_ = ContentToken::andIndex(andAncestor);
500   andAncestor_ = andAncestor;
501   andGroupIndex_ = andGroupIndex;
502   if (andIndex_ + nMembers() > info.andStateSize)
503     info.andStateSize = andIndex_ + nMembers();
504   Vector<FirstSet> firstVec(nMembers());
505   Vector<LastSet> lastVec(nMembers());
506   member(0).analyze(info, this, 0, firstVec[0], lastVec[0]);
507   first = firstVec[0];
508   first.setNotRequired();
509   last = lastVec[0];
510   inherentlyOptional_ = member(0).inherentlyOptional();
511   unsigned i;
512   for (i = 1; i < nMembers(); i++) {
513     member(i).analyze(info, this, i, firstVec[i], lastVec[i]);
514     first.append(firstVec[i]);
515     first.setNotRequired();
516     last.append(lastVec[i]);
517     inherentlyOptional_ &= member(i).inherentlyOptional();
518   }
519   for (i = 0; i < nMembers(); i++) {
520     for (unsigned j = 0; j < nMembers(); j++)
521       if (j != i)
522 	addTransitions(lastVec[i], firstVec[j], 0,
523 		       andIndex() + nMembers(),
524 		       andDepth() + 1,
525 		       !member(j).inherentlyOptional(),
526 		       andIndex() + j, andIndex() + i);
527   }
528 }
529 
addTransitions(const LastSet & from,const FirstSet & to,Boolean maybeRequired,unsigned andClearIndex,unsigned andDepth,Boolean isolated,unsigned requireClear,unsigned toSet)530 void ContentToken::addTransitions(const LastSet &from,
531 				  const FirstSet &to,
532 				  Boolean maybeRequired,
533 				  unsigned andClearIndex,
534 				  unsigned andDepth,
535 				  Boolean isolated,
536 				  unsigned requireClear,
537 				  unsigned toSet)
538 {
539   size_t length = from.size();
540   for (unsigned i = 0; i < length; i++)
541     from[i]->addTransitions(to,
542 			    maybeRequired,
543 			    andClearIndex,
544 			    andDepth,
545 			    isolated,
546 			    requireClear,
547 			    toSet);
548 }
549 
addTransitions(const FirstSet & to,Boolean maybeRequired,unsigned andClearIndex,unsigned andDepth,Boolean isolated,unsigned requireClear,unsigned toSet)550 void LeafContentToken::addTransitions(const FirstSet &to,
551 				      Boolean maybeRequired,
552 				      unsigned andClearIndex,
553 				      unsigned andDepth,
554 				      Boolean isolated,
555 				      unsigned requireClear,
556 				      unsigned toSet)
557 {
558   if (maybeRequired && to.requiredIndex() != size_t(-1)) {
559     ASSERT(requiredIndex_ == size_t(-1));
560     requiredIndex_ = to.requiredIndex() + follow_.size();
561   }
562   size_t length = follow_.size();
563   size_t n = to.size();
564   follow_.resize(length + n);
565   for (size_t i = 0; i < n; i++)
566     follow_[length + i] = to.token(i);
567   if (andInfo_) {
568     andInfo_->follow.resize(length + n);
569     for (size_t i = 0; i < n; i++) {
570       Transition &t = andInfo_->follow[length + i];
571       t.clearAndStateStartIndex = andClearIndex;
572       t.andDepth = andDepth;
573       t.isolated = isolated;
574       t.requireClear = requireClear;
575       t.toSet = toSet;
576     }
577   }
578 }
579 
AndState(unsigned n)580 AndState::AndState(unsigned n)
581 : v_(n, PackedBoolean(0)), clearFrom_(0)
582 {
583 }
584 
clearFrom1(unsigned i)585 void AndState::clearFrom1(unsigned i)
586 {
587   while (clearFrom_ > i)
588     v_[--clearFrom_] = 0;
589 }
590 
MatchState()591 MatchState::MatchState()
592 : andState_(0)
593 {
594 }
595 
MatchState(const CompiledModelGroup * model)596 MatchState::MatchState(const CompiledModelGroup *model)
597 : pos_(model ? model->initial() : 0),
598   andState_(model ? model->andStateSize() : 0),
599   minAndDepth_(0)
600 {
601 }
602 
invalidExclusion(const ElementType * e) const603 const LeafContentToken *MatchState::invalidExclusion(const ElementType *e)
604      const
605 {
606   const LeafContentToken *token = pos_->transitionToken(e, andState_,
607 							minAndDepth_);
608   if (token && !token->inherentlyOptional() && !token->orGroupMember())
609     return token;
610   else
611     return 0;
612 }
613 
operator ==(const MatchState & state) const614 Boolean MatchState::operator==(const MatchState &state) const
615 {
616   return (pos_ == state.pos_ && andState_ == state.andState_
617 	  && minAndDepth_ == state.minAndDepth_);
618 }
619 
operator ==(const AndState & state) const620 Boolean AndState::operator==(const AndState &state) const
621 {
622   ASSERT(v_.size() == state.v_.size());
623   for (size_t i = 0; i < v_.size(); i++) {
624     if (i >= clearFrom_ && i >= state.clearFrom_)
625       break;
626     if (v_[i] != state.v_[i])
627       return 0;
628   }
629   return 1;
630 }
631 
632 const LeafContentToken *
transitionToken(const ElementType * to,const AndState & andState,unsigned minAndDepth) const633 LeafContentToken::transitionToken(const ElementType *to,
634 				  const AndState &andState,
635 				  unsigned minAndDepth) const
636 {
637   Vector<LeafContentToken *>::const_iterator p = follow_.begin();
638   if (!andInfo_) {
639     for (size_t n = follow_.size(); n > 0; n--, p++)
640       if ((*p)->elementType() == to)
641 	return *p;
642   }
643   else {
644     Vector<Transition>::const_iterator q = andInfo_->follow.begin();
645     for (size_t n = follow_.size(); n > 0; n--, p++, q++)
646       if ((*p)->elementType() == to
647 	  && ((q->requireClear == unsigned(Transition::invalidIndex)
648 	       || andState.isClear(q->requireClear))
649 	      && q->andDepth >= minAndDepth))
650 	return (*p);
651   }
652   return 0;
653 }
654 
655 Boolean
tryTransition(const ElementType * to,AndState & andState,unsigned & minAndDepth,const LeafContentToken * & newpos) const656 LeafContentToken::tryTransition(const ElementType *to,
657 				AndState &andState,
658 				unsigned &minAndDepth,
659 				const LeafContentToken *&newpos) const
660 {
661   Vector<LeafContentToken *>::const_iterator p = follow_.begin();
662   if (!andInfo_) {
663     for (size_t n = follow_.size(); n > 0; n--, p++) {
664       if ((*p)->elementType() == to) {
665 	newpos = *p;
666 	minAndDepth = newpos->computeMinAndDepth(andState);
667 	return 1;
668       }
669     }
670   }
671   else {
672     Vector<Transition>::const_iterator q = andInfo_->follow.begin();
673     for (size_t n = follow_.size(); n > 0; n--, p++, q++) {
674     if ((*p)->elementType() == to
675 	&& ((q->requireClear == unsigned(Transition::invalidIndex)
676 	     || andState.isClear(q->requireClear))
677 	    && q->andDepth >= minAndDepth)) {
678 	if (q->toSet != unsigned(Transition::invalidIndex))
679 	  andState.set(q->toSet);
680 	andState.clearFrom(q->clearAndStateStartIndex);
681 	newpos = *p;
682 	minAndDepth = newpos->computeMinAndDepth(andState);
683 	return 1;
684       }
685     }
686   }
687   return 0;
688 }
689 
690 void
possibleTransitions(const AndState & andState,unsigned minAndDepth,Vector<const ElementType * > & v) const691 LeafContentToken::possibleTransitions(const AndState &andState,
692 				      unsigned minAndDepth,
693 				      Vector<const ElementType *> &v) const
694 {
695   Vector<LeafContentToken *>::const_iterator p = follow_.begin();
696   if (!andInfo_) {
697     for (size_t n = follow_.size(); n > 0; n--, p++)
698       v.push_back((*p)->elementType());
699   }
700   else {
701     Vector<Transition>::const_iterator q = andInfo_->follow.begin();
702     for (size_t n = follow_.size(); n > 0; n--, p++, q++)
703       if ((q->requireClear == unsigned(Transition::invalidIndex)
704 	   || andState.isClear(q->requireClear))
705 	&& q->andDepth >= minAndDepth)
706 	v.push_back((*p)->elementType());
707   }
708 }
709 
computeMinAndDepth1(const AndState & andState) const710 unsigned LeafContentToken::computeMinAndDepth1(const AndState &andState) const
711 {
712   ASSERT(andInfo_ != 0);
713   unsigned groupIndex = andInfo_->andGroupIndex;
714   for (const AndModelGroup *group = andInfo_->andAncestor;
715        group;
716        groupIndex = group->andGroupIndex(), group = group->andAncestor())
717     for (unsigned i = 0; i < group->nMembers(); i++)
718       if (i != groupIndex && !group->member(i).inherentlyOptional()
719 	  && andState.isClear(group->andIndex() + i))
720 	return group->andDepth() + 1;
721   return 0;
722 }
723 
724 const LeafContentToken *
impliedStartTag(const AndState & andState,unsigned minAndDepth) const725 LeafContentToken::impliedStartTag(const AndState &andState,
726 				  unsigned minAndDepth) const
727 {
728   if (requiredIndex_ != size_t(-1)) {
729     if (!andInfo_)
730       return follow_[requiredIndex_];
731     const Transition &t = andInfo_->follow[requiredIndex_];
732     if ((t.requireClear == unsigned(Transition::invalidIndex)
733 	 || andState.isClear(t.requireClear))
734 	&& t.andDepth >= minAndDepth)
735       return follow_[requiredIndex_];
736   }
737   return 0;
738 }
739 
doRequiredTransition(AndState & andState,unsigned & minAndDepth,const LeafContentToken * & newpos) const740 void LeafContentToken::doRequiredTransition(AndState &andState,
741 					    unsigned &minAndDepth,
742 					    const LeafContentToken *&newpos)
743      const
744 {
745   ASSERT(requiredIndex_ != size_t(-1));
746   if (andInfo_) {
747     const Transition &t = andInfo_->follow[requiredIndex_];
748     if (t.toSet != unsigned(Transition::invalidIndex))
749       andState.set(t.toSet);
750     andState.clearFrom(t.clearAndStateStartIndex);
751   }
752   newpos = follow_[requiredIndex_];
753   minAndDepth = newpos->computeMinAndDepth(andState);
754 }
755 
FirstSet()756 FirstSet::FirstSet()
757 : requiredIndex_(size_t(-1))
758 {
759 }
760 
init(LeafContentToken * p)761 void FirstSet::init(LeafContentToken *p)
762 {
763   v_.assign(1, p);
764   v_.reserve(256);
765   requiredIndex_ = 0;
766 }
767 
append(const FirstSet & set)768 void FirstSet::append(const FirstSet &set)
769 {
770   if (set.requiredIndex_ != size_t(-1)) {
771     ASSERT(requiredIndex_ == size_t(-1));
772     requiredIndex_ = set.requiredIndex_ + v_.size();
773   }
774   size_t oldSize = v_.size();
775   v_.resize(v_.size() + set.v_.size());
776   for (size_t i = 0; i < set.v_.size(); i++)
777     v_[oldSize + i] = set.v_[i];
778 }
779 
append(const LastSet & set)780 void LastSet::append(const LastSet &set)
781 {
782   size_t oldSize = size();
783   resize(size() + set.size());
784   for (size_t i = 0; i < set.size(); i++)
785     (*this)[oldSize + i] = set[i];
786 }
787 
788 #ifdef SP_NAMESPACE
789 }
790 #endif
791