1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident "%Z%%M% %I% %E% SMI"
4
5 #include "splib.h"
6 #include "Parser.h"
7 #include "Param.h"
8 #include "Group.h"
9 #include "Markup.h"
10 #include "ParserMessages.h"
11 #include "MessageArg.h"
12 #include "TokenMessageArg.h"
13 #include "token.h"
14 #include "macros.h"
15
16 #ifdef SP_NAMESPACE
17 namespace SP_NAMESPACE {
18 #endif
19
parseParam(const AllowedParams & allow,unsigned declInputLevel,Param & parm)20 Boolean Parser::parseParam(const AllowedParams &allow,
21 unsigned declInputLevel,
22 Param &parm)
23 {
24 for (;;) {
25 Token token = getToken(allow.mainMode());
26 switch (token) {
27 case tokenUnrecognized:
28 if (reportNonSgmlCharacter())
29 break;
30 {
31 message(ParserMessages::markupDeclarationCharacter,
32 StringMessageArg(currentToken()),
33 AllowedParamsMessageArg(allow, syntaxPointer()));
34 }
35 return 0;
36 case tokenEe:
37 if (inputLevel() <= declInputLevel) {
38 message(ParserMessages::declarationLevel);
39 return 0;
40 }
41 if (currentMarkup())
42 currentMarkup()->addEntityEnd();
43 popInputStack();
44 break;
45 case tokenCom:
46 if (!parseComment(comMode))
47 return 0;
48 if (options().warnPsComment)
49 message(ParserMessages::psComment);
50 break;
51 case tokenDso:
52 if (!allow.dso()) {
53 paramInvalidToken(tokenDso, allow);
54 return 0;
55 }
56 if (currentMarkup())
57 currentMarkup()->addDelim(Syntax::dDSO);
58 parm.type = Param::dso;
59 return 1;
60 case tokenGrpo:
61 if (currentMarkup())
62 currentMarkup()->addDelim(Syntax::dGRPO);
63 switch (allow.group()) {
64 case Param::invalid:
65 paramInvalidToken(tokenGrpo, allow);
66 return 0;
67 case Param::modelGroup:
68 {
69 ModelGroup *group;
70 if (!parseModelGroup(1, declInputLevel, group, grpsufMode))
71 return 0;
72 parm.type = Param::modelGroup;
73 parm.modelGroupPtr = group;
74 }
75 break;
76 case Param::nameGroup:
77 if (!parseNameGroup(declInputLevel, parm))
78 return 0;
79 break;
80 case Param::nameTokenGroup:
81 if (!parseNameTokenGroup(declInputLevel, parm))
82 return 0;
83 break;
84 default:
85 CANNOT_HAPPEN();
86 }
87 parm.type = allow.group();
88 return 1;
89 case tokenLita:
90 case tokenLit:
91 parm.type = allow.literal();
92 parm.lita = token == tokenLita;
93 switch (allow.literal()) {
94 case Param::invalid:
95 paramInvalidToken(token, allow);
96 return 0;
97 case Param::minimumLiteral:
98 if (!parseMinimumLiteral(parm.lita, parm.literalText))
99 return 0;
100 break;
101 case Param::attributeValueLiteral:
102 if (!parseAttributeValueLiteral(parm.lita, parm.literalText))
103 return 0;
104 break;
105 case Param::tokenizedAttributeValueLiteral:
106 if (!parseTokenizedAttributeValueLiteral(parm.lita, parm.literalText))
107 return 0;
108 break;
109 case Param::systemIdentifier:
110 if (!parseSystemIdentifier(parm.lita, parm.literalText))
111 return 0;
112 break;
113 case Param::paramLiteral:
114 if (!parseParameterLiteral(parm.lita, parm.literalText))
115 return 0;
116 break;
117 }
118 if (currentMarkup())
119 currentMarkup()->addLiteral(parm.literalText);
120 return 1;
121 case tokenMdc:
122 if (!allow.mdc()) {
123 paramInvalidToken(tokenMdc, allow);
124 return 0;
125 }
126 if (inputLevel() > declInputLevel)
127 message(ParserMessages::parameterEntityNotEnded);
128 if (currentMarkup())
129 currentMarkup()->addDelim(Syntax::dMDC);
130 parm.type = Param::mdc;
131 return 1;
132 case tokenMinus:
133 parm.type = Param::minus;
134 if (currentMarkup())
135 currentMarkup()->addDelim(Syntax::dMINUS);
136 return 1;
137 case tokenMinusGrpo:
138 if (!allow.exclusions()) {
139 paramInvalidToken(tokenMinusGrpo, allow);
140 return 0;
141 }
142 if (currentMarkup()) {
143 currentMarkup()->addDelim(Syntax::dMINUS);
144 currentMarkup()->addDelim(Syntax::dGRPO);
145 }
146 parm.type = Param::exclusions;
147 return parseElementNameGroup(declInputLevel, parm);
148 case tokenPero:
149 parm.type = Param::pero;
150 if (currentMarkup())
151 currentMarkup()->addDelim(Syntax::dPERO);
152 return 1;
153 case tokenPeroGrpo:
154 if (!inInstance())
155 message(ParserMessages::peroGrpoProlog);
156 // fall through
157 case tokenPeroNameStart:
158 {
159 if (inInstance()) {
160 if (options().warnInstanceParamEntityRef)
161 message(ParserMessages::instanceParamEntityRef);
162 }
163 else {
164 if (options().warnInternalSubsetPsParamEntityRef && inputLevel() == 1)
165 message(ParserMessages::internalSubsetPsParamEntityRef);
166 }
167 ConstPtr<Entity> entity;
168 Ptr<EntityOrigin> origin;
169 if (!parseEntityReference(1, token == tokenPeroGrpo, entity, origin))
170 return 0;
171 if (!entity.isNull())
172 entity->declReference(*this, origin);
173 }
174 break;
175 case tokenPlusGrpo:
176 if (!allow.inclusions()) {
177 paramInvalidToken(tokenPlusGrpo, allow);
178 return 0;
179 }
180 if (currentMarkup()) {
181 currentMarkup()->addDelim(Syntax::dPLUS);
182 currentMarkup()->addDelim(Syntax::dGRPO);
183 }
184 parm.type = Param::inclusions;
185 return parseElementNameGroup(declInputLevel, parm);
186 case tokenRni:
187 if (!allow.rni()) {
188 paramInvalidToken(tokenRni, allow);
189 return 0;
190 }
191 return parseIndicatedReservedName(allow, parm);
192 case tokenS:
193 if (currentMarkup())
194 currentMarkup()->addS(currentChar());
195 break;
196 case tokenNameStart:
197 switch (allow.nameStart()) {
198 case Param::invalid:
199 paramInvalidToken(tokenNameStart, allow);
200 return 0;
201 case Param::reservedName:
202 return parseReservedName(allow, parm);
203 case Param::name:
204 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
205 parm.type = Param::name;
206 getCurrentToken(syntax().generalSubstTable(), parm.token);
207 if (currentMarkup())
208 currentMarkup()->addName(currentInput());
209 return 1;
210 case Param::entityName:
211 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
212 parm.type = Param::entityName;
213 getCurrentToken(syntax().entitySubstTable(), parm.token);
214 if (currentMarkup())
215 currentMarkup()->addName(currentInput());
216 return 1;
217 case Param::paramEntityName:
218 extendNameToken(syntax().penamelen(),
219 ParserMessages::parameterEntityNameLength);
220 parm.type = Param::paramEntityName;
221 getCurrentToken(syntax().entitySubstTable(), parm.token);
222 if (currentMarkup())
223 currentMarkup()->addName(currentInput());
224 return 1;
225 case Param::attributeValue:
226 return parseAttributeValueParam(parm);
227 }
228 break;
229 case tokenDigit:
230 switch (allow.digit()) {
231 case Param::invalid:
232 paramInvalidToken(tokenDigit, allow);
233 return 0;
234 case Param::number:
235 extendNumber(syntax().namelen(), ParserMessages::numberLength);
236 parm.type = Param::number;
237 getCurrentToken(parm.token);
238 if (currentMarkup())
239 currentMarkup()->addNumber(currentInput());
240 return 1;
241 case Param::attributeValue:
242 return parseAttributeValueParam(parm);
243 }
244 break;
245 case tokenLcUcNmchar:
246 switch (allow.nmchar()) {
247 case Param::invalid:
248 paramInvalidToken(tokenLcUcNmchar, allow);
249 return 0;
250 case Param::attributeValue:
251 return parseAttributeValueParam(parm);
252 }
253 break;
254 default:
255 CANNOT_HAPPEN();
256 }
257 }
258 }
259
paramInvalidToken(Token token,const AllowedParams & allow)260 void Parser::paramInvalidToken(Token token, const AllowedParams &allow)
261 {
262 message(ParserMessages::paramInvalidToken,
263 TokenMessageArg(token, allow.mainMode(),
264 syntaxPointer(), sdPointer()),
265 AllowedParamsMessageArg(allow, syntaxPointer()));
266 }
267
parseGroupToken(const AllowedGroupTokens & allow,unsigned nestingLevel,unsigned declInputLevel,unsigned groupInputLevel,GroupToken & gt)268 Boolean Parser::parseGroupToken(const AllowedGroupTokens &allow,
269 unsigned nestingLevel,
270 unsigned declInputLevel,
271 unsigned groupInputLevel,
272 GroupToken >)
273 {
274 for (;;) {
275 Token token = getToken(grpMode);
276 switch (token) {
277 case tokenEe:
278 if (inputLevel() <= groupInputLevel) {
279 message(ParserMessages::groupLevel);
280 if (inputLevel() <= declInputLevel)
281 return 0;
282 }
283 else if (!sd().www())
284 message(ParserMessages::groupEntityEnd);
285 if (currentMarkup())
286 currentMarkup()->addEntityEnd();
287 popInputStack();
288 break;
289 case tokenPeroGrpo:
290 if (!inInstance())
291 message(ParserMessages::peroGrpoProlog);
292 // fall through
293 case tokenPeroNameStart:
294 {
295 if (options().warnInternalSubsetTsParamEntityRef && inputLevel() == 1)
296 message(ParserMessages::internalSubsetTsParamEntityRef);
297 ConstPtr<Entity> entity;
298 Ptr<EntityOrigin> origin;
299 if (!parseEntityReference(1, token == tokenPeroGrpo, entity, origin))
300 return 0;
301 if (!entity.isNull())
302 entity->declReference(*this, origin);
303 }
304 break;
305 case tokenUnrecognized:
306 if (reportNonSgmlCharacter())
307 break;
308 {
309 message(ParserMessages::groupCharacter,
310 StringMessageArg(currentToken()),
311 AllowedGroupTokensMessageArg(allow, syntaxPointer()));
312 }
313 return 0;
314 case tokenDtgo:
315 if (!allow.groupToken(GroupToken::dataTagGroup)) {
316 groupTokenInvalidToken(tokenDtgo, allow);
317 return 0;
318 }
319 if (sd().datatag())
320 message(ParserMessages::datatagNotImplemented);
321 if (currentMarkup())
322 currentMarkup()->addDelim(Syntax::dDTGO);
323 return parseDataTagGroup(nestingLevel + 1, declInputLevel, gt);
324 case tokenGrpo:
325 if (currentMarkup())
326 currentMarkup()->addDelim(Syntax::dGRPO);
327 switch (allow.group()) {
328 case GroupToken::modelGroup:
329 {
330 ModelGroup *modelGroup;
331 if (!parseModelGroup(nestingLevel + 1, declInputLevel, modelGroup,
332 grpMode))
333 return 0;
334 gt.model = modelGroup;
335 gt.type = GroupToken::modelGroup;
336 return 1;
337 }
338 case GroupToken::dataTagTemplateGroup:
339 return parseDataTagTemplateGroup(nestingLevel + 1, declInputLevel, gt);
340 default:
341 groupTokenInvalidToken(tokenGrpo, allow);
342 return 0;
343 }
344 break;
345 case tokenRni:
346 if (!allow.groupToken(GroupToken::pcdata)) {
347 groupTokenInvalidToken(tokenRni, allow);
348 return 0;
349 }
350 Syntax::ReservedName rn;
351 if (!getIndicatedReservedName(&rn))
352 return 0;
353 if (rn != Syntax::rPCDATA) {
354 StringC token(syntax().delimGeneral(Syntax::dRNI));
355 token += syntax().reservedName(Syntax::rPCDATA);
356 message(ParserMessages::invalidToken, StringMessageArg(token));
357 return 0;
358 }
359 gt.type = GroupToken::pcdata;
360 gt.contentToken = new PcdataToken;
361 return 1;
362 case tokenS:
363 if (currentMarkup()) {
364 extendS();
365 currentMarkup()->addS(currentInput());
366 }
367 break;
368 case tokenNameStart:
369 switch (allow.nameStart()) {
370 case GroupToken::elementToken:
371 {
372 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
373 gt.type = GroupToken::elementToken;
374 StringC &buffer = nameBuffer();
375 getCurrentToken(syntax().generalSubstTable(), buffer);
376 if (currentMarkup())
377 currentMarkup()->addName(currentInput());
378 const ElementType *e = lookupCreateElement(buffer);
379 ContentToken::OccurrenceIndicator oi
380 = getOccurrenceIndicator(grpMode);
381 gt.contentToken = new ElementToken(e, oi);
382 return 1;
383 }
384 case GroupToken::name:
385 case GroupToken::nameToken:
386 extendNameToken(syntax().namelen(),
387 token == GroupToken::name
388 ? ParserMessages::nameLength
389 : ParserMessages::nameTokenLength);
390 getCurrentToken(syntax().generalSubstTable(), gt.token);
391 gt.type = allow.nameStart();
392 if (currentMarkup()) {
393 if (gt.type == GroupToken::nameToken)
394 currentMarkup()->addNameToken(currentInput());
395 else
396 currentMarkup()->addName(currentInput());
397 }
398 return 1;
399 default:
400 groupTokenInvalidToken(tokenNameStart, allow);
401 return 0;
402 }
403 case tokenDigit:
404 case tokenLcUcNmchar:
405 if (!allow.groupToken(GroupToken::nameToken)) {
406 groupTokenInvalidToken(token, allow);
407 return 0;
408 }
409 extendNameToken(syntax().namelen(), ParserMessages::nameTokenLength);
410 getCurrentToken(syntax().generalSubstTable(), gt.token);
411 gt.type = GroupToken::nameToken;
412 if (currentMarkup())
413 currentMarkup()->addNameToken(currentInput());
414 return 1;
415 case tokenLit:
416 case tokenLita:
417 // parameter literal in data tag pattern
418 if (!allow.groupToken(GroupToken::dataTagLiteral)) {
419 groupTokenInvalidToken(token, allow);
420 return 0;
421 }
422 if (!parseDataTagParameterLiteral(token == tokenLita, gt.text))
423 return 0;
424 gt.type = GroupToken::dataTagLiteral;
425 if (currentMarkup())
426 currentMarkup()->addLiteral(gt.text);
427 return 1;
428 case tokenAnd:
429 case tokenSeq:
430 case tokenOr:
431 case tokenDtgc:
432 case tokenGrpc:
433 case tokenOpt:
434 case tokenPlus:
435 case tokenRep:
436 groupTokenInvalidToken(token, allow);
437 return 0;
438 }
439 }
440 }
441
442
groupTokenInvalidToken(Token token,const AllowedGroupTokens & allow)443 void Parser::groupTokenInvalidToken(Token token, const AllowedGroupTokens &allow)
444 {
445 message(ParserMessages::groupTokenInvalidToken,
446 TokenMessageArg(token, grpMode, syntaxPointer(), sdPointer()),
447 AllowedGroupTokensMessageArg(allow, syntaxPointer()));
448 }
449
450
parseGroupConnector(const AllowedGroupConnectors & allow,unsigned declInputLevel,unsigned groupInputLevel,GroupConnector & gc)451 Boolean Parser::parseGroupConnector(const AllowedGroupConnectors &allow,
452 unsigned declInputLevel,
453 unsigned groupInputLevel,
454 GroupConnector &gc)
455 {
456 for (;;) {
457 Token token = getToken(grpMode);
458 switch (token) {
459 case tokenEe:
460 if (inputLevel() <= groupInputLevel) {
461 message(ParserMessages::groupLevel);
462 if (inputLevel() <= declInputLevel)
463 return 0;
464 }
465 if (currentMarkup())
466 currentMarkup()->addEntityEnd();
467 popInputStack();
468 break;
469 case tokenS:
470 if (currentMarkup()) {
471 extendS();
472 currentMarkup()->addS(currentInput());
473 }
474 break;
475 case tokenPeroGrpo:
476 if (inInstance()) {
477 message(ParserMessages::peroGrpoProlog);
478 break;
479 }
480 // fall through
481 case tokenPeroNameStart:
482 if (!sd().www())
483 message(ParserMessages::groupEntityReference);
484 else {
485 ConstPtr<Entity> entity;
486 Ptr<EntityOrigin> origin;
487 if (!parseEntityReference(1, token == tokenPeroGrpo, entity, origin))
488 return 0;
489 if (!entity.isNull())
490 entity->declReference(*this, origin);
491 }
492 break;
493 case tokenUnrecognized:
494 if (reportNonSgmlCharacter())
495 break;
496 {
497 message(ParserMessages::groupCharacter,
498 StringMessageArg(currentToken()),
499 AllowedGroupConnectorsMessageArg(allow, syntaxPointer()));
500 }
501 return 0;
502 case tokenAnd:
503 if (!allow.groupConnector(GroupConnector::andGC)) {
504 groupConnectorInvalidToken(tokenAnd, allow);
505 return 0;
506 }
507 gc.type = GroupConnector::andGC;
508 if (currentMarkup())
509 currentMarkup()->addDelim(Syntax::dAND);
510 return 1;
511 case tokenSeq:
512 if (!allow.groupConnector(GroupConnector::seqGC)) {
513 groupConnectorInvalidToken(tokenSeq, allow);
514 return 0;
515 }
516 gc.type = GroupConnector::seqGC;
517 if (currentMarkup())
518 currentMarkup()->addDelim(Syntax::dSEQ);
519 return 1;
520 case tokenOr:
521 if (!allow.groupConnector(GroupConnector::orGC)) {
522 groupConnectorInvalidToken(tokenOr, allow);
523 return 0;
524 }
525 gc.type = GroupConnector::orGC;
526 if (currentMarkup())
527 currentMarkup()->addDelim(Syntax::dOR);
528 return 1;
529 case tokenDtgc:
530 if (!allow.groupConnector(GroupConnector::dtgcGC)) {
531 groupConnectorInvalidToken(tokenDtgc, allow);
532 return 0;
533 }
534 gc.type = GroupConnector::dtgcGC;
535 if (inputLevel() > groupInputLevel)
536 message(ParserMessages::groupParameterEntityNotEnded);
537 if (currentMarkup())
538 currentMarkup()->addDelim(Syntax::dDTGC);
539 return 1;
540 case tokenGrpc:
541 if (!allow.groupConnector(GroupConnector::grpcGC)) {
542 groupConnectorInvalidToken(tokenGrpc, allow);
543 return 0;
544 }
545 gc.type = GroupConnector::grpcGC;
546 if (inputLevel() > groupInputLevel)
547 message(ParserMessages::groupParameterEntityNotEnded);
548 if (currentMarkup())
549 currentMarkup()->addDelim(Syntax::dGRPC);
550 return 1;
551 default:
552 groupConnectorInvalidToken(token, allow);
553 return 0;
554 }
555 }
556 }
557
groupConnectorInvalidToken(Token token,const AllowedGroupConnectors & allow)558 void Parser::groupConnectorInvalidToken(Token token,
559 const AllowedGroupConnectors &allow)
560 {
561 message(ParserMessages::connectorInvalidToken,
562 TokenMessageArg(token, grpMode, syntaxPointer(), sdPointer()),
563 AllowedGroupConnectorsMessageArg(allow, syntaxPointer()));
564 }
565
parseElementNameGroup(unsigned declInputLevel,Param & parm)566 Boolean Parser::parseElementNameGroup(unsigned declInputLevel, Param &parm)
567 {
568 if (!parseNameGroup(declInputLevel, parm))
569 return 0;
570 parm.elementVector.resize(parm.nameTokenVector.size());
571 for (size_t i = 0; i < parm.nameTokenVector.size(); i++)
572 parm.elementVector[i] = lookupCreateElement(parm.nameTokenVector[i].name);
573 return 1;
574 }
575
parseEntityReferenceNameGroup(Boolean & ignore)576 Boolean Parser::parseEntityReferenceNameGroup(Boolean &ignore)
577 {
578 Param parm;
579 if (!parseNameGroup(inputLevel(), parm))
580 return 0;
581 if (inInstance()) {
582 for (size_t i = 0; i < parm.nameTokenVector.size(); i++) {
583 const Lpd *lpd = lookupLpd(parm.nameTokenVector[i].name).pointer();
584 if (lpd && lpd->active()) {
585 ignore = 0;
586 return 1;
587 }
588 }
589 }
590 ignore = 1;
591 return 1;
592 }
593
parseTagNameGroup(Boolean & active)594 Boolean Parser::parseTagNameGroup(Boolean &active)
595 {
596 Param parm;
597 if (!parseNameGroup(inputLevel(), parm))
598 return 0;
599 active = 0;
600 return 1;
601 }
602
parseNameGroup(unsigned declInputLevel,Param & parm)603 Boolean Parser::parseNameGroup(unsigned declInputLevel, Param &parm)
604 {
605 static AllowedGroupTokens allowName(GroupToken::name);
606 return parseGroup(allowName, declInputLevel, parm);
607 }
608
parseNameTokenGroup(unsigned declInputLevel,Param & parm)609 Boolean Parser::parseNameTokenGroup(unsigned declInputLevel, Param &parm)
610 {
611 static AllowedGroupTokens allowNameToken(GroupToken::nameToken);
612 return parseGroup(allowNameToken, declInputLevel, parm);
613 }
614
615 static
groupContains(const Vector<NameToken> & vec,const StringC & str)616 Boolean groupContains(const Vector<NameToken> &vec, const StringC &str)
617 {
618 for (size_t i = 0; i < vec.size(); i++)
619 if (vec[i].name == str)
620 return 1;
621 return 0;
622 }
623
parseGroup(const AllowedGroupTokens & allowToken,unsigned declInputLevel,Param & parm)624 Boolean Parser::parseGroup(const AllowedGroupTokens &allowToken,
625 unsigned declInputLevel,
626 Param &parm)
627 {
628 unsigned groupInputLevel = inputLevel();
629 int nDuplicates = 0;
630 Vector<NameToken> &vec = parm.nameTokenVector;
631 vec.clear();
632 GroupConnector::Type connector = GroupConnector::grpcGC;
633 GroupToken gt;
634 for (;;) {
635 if (!parseGroupToken(allowToken, 0, declInputLevel, groupInputLevel, gt))
636 return 0;
637 if (groupContains(vec, gt.token)) {
638 nDuplicates++;
639 message(ParserMessages::duplicateGroupToken,
640 StringMessageArg(gt.token));
641 }
642 else {
643 vec.resize(vec.size() + 1);
644 gt.token.swap(vec.back().name);
645 getCurrentToken(vec.back().origName);
646 vec.back().loc = currentLocation();
647 }
648 GroupConnector gc;
649 static AllowedGroupConnectors allowAnyConnectorGrpc(GroupConnector::orGC,
650 GroupConnector::andGC,
651 GroupConnector::seqGC,
652 GroupConnector::grpcGC);
653
654 if (!parseGroupConnector(allowAnyConnectorGrpc, declInputLevel,
655 groupInputLevel, gc))
656 return 0;
657 if (gc.type == GroupConnector::grpcGC)
658 break;
659 if (options().warnNameGroupNotOr) {
660 if (gc.type != GroupConnector::orGC)
661 message(ParserMessages::nameGroupNotOr);
662 }
663 else if (options().warnShould) {
664 if (connector == GroupConnector::grpcGC)
665 connector = gc.type;
666 else if (gc.type != connector) {
667 message(ParserMessages::mixedConnectors);
668 connector = gc.type;
669 }
670 }
671 }
672 if (nDuplicates + vec.size() > syntax().grpcnt())
673 message(ParserMessages::groupCount, NumberMessageArg(syntax().grpcnt()));
674 return 1;
675 }
676
parseDataTagGroup(unsigned nestingLevel,unsigned declInputLevel,GroupToken & result)677 Boolean Parser::parseDataTagGroup(unsigned nestingLevel,
678 unsigned declInputLevel, GroupToken &result)
679 {
680 if (nestingLevel - 1 == syntax().grplvl())
681 message(ParserMessages::grplvl, NumberMessageArg(syntax().grplvl()));
682 unsigned groupInputLevel = inputLevel();
683 GroupToken gt;
684 static AllowedGroupTokens allowName(GroupToken::name);
685 if (!parseGroupToken(allowName, nestingLevel, declInputLevel,
686 groupInputLevel, gt))
687 return 0;
688 const ElementType *element = lookupCreateElement(gt.token);
689 GroupConnector gc;
690 static AllowedGroupConnectors allowSeq(GroupConnector::seqGC);
691 if (!parseGroupConnector(allowSeq, declInputLevel, groupInputLevel, gc))
692 return 0;
693 static AllowedGroupTokens
694 allowDataTagLiteralDataTagTemplateGroup(GroupToken::dataTagLiteral,
695 GroupToken::dataTagTemplateGroup);
696 if (!parseGroupToken(allowDataTagLiteralDataTagTemplateGroup,
697 nestingLevel,
698 declInputLevel,
699 groupInputLevel,
700 gt))
701 return 0;
702 Vector<Text> templates;
703 if (gt.type == GroupToken::dataTagTemplateGroup)
704 gt.textVector.swap(templates);
705 else {
706 templates.resize(1);
707 gt.text.swap(templates[0]);
708 }
709 static AllowedGroupConnectors allowSeqDtgc(GroupConnector::seqGC,
710 GroupConnector::dtgcGC);
711 if (!parseGroupConnector(allowSeqDtgc, declInputLevel, groupInputLevel, gc))
712 return 0;
713 NCVector<Owner<ContentToken> > vec(2);
714 vec[1] = new PcdataToken;
715 if (gc.type != GroupConnector::dtgcGC) {
716 static AllowedGroupTokens allowDataTagLiteral(GroupToken::dataTagLiteral);
717 if (!parseGroupToken(allowDataTagLiteral,
718 nestingLevel,
719 declInputLevel,
720 groupInputLevel,
721 gt))
722 return 0;
723 vec[0] = new DataTagElementToken(element, templates, gt.text);
724 static AllowedGroupConnectors allowDtgc(GroupConnector::dtgcGC);
725 if (!parseGroupConnector(allowDtgc, declInputLevel, groupInputLevel, gc))
726 return 0;
727 }
728 else
729 vec[0] = new DataTagElementToken(element, templates);
730 ContentToken::OccurrenceIndicator oi = getOccurrenceIndicator(grpMode);
731 result.contentToken = new DataTagGroup(vec, oi);
732 result.type = GroupToken::dataTagGroup;
733 return 1;
734 }
735
parseDataTagTemplateGroup(unsigned nestingLevel,unsigned declInputLevel,GroupToken & result)736 Boolean Parser::parseDataTagTemplateGroup(unsigned nestingLevel,
737 unsigned declInputLevel,
738 GroupToken &result)
739 {
740 if (nestingLevel - 1 == syntax().grplvl())
741 message(ParserMessages::grplvl, NumberMessageArg(syntax().grplvl()));
742 unsigned groupInputLevel = inputLevel();
743 Vector<Text> &vec = result.textVector;
744 for (;;) {
745 GroupToken gt;
746 static AllowedGroupTokens allowDataTagLiteral(GroupToken::dataTagLiteral);
747 if (!parseGroupToken(allowDataTagLiteral,
748 nestingLevel,
749 declInputLevel,
750 groupInputLevel,
751 gt))
752 return 0;
753 if (vec.size() == syntax().grpcnt())
754 message(ParserMessages::groupCount, NumberMessageArg(syntax().grpcnt()));
755 vec.resize(vec.size() + 1);
756 gt.text.swap(vec.back());
757 static AllowedGroupConnectors allowOrGrpc(GroupConnector::orGC,
758 GroupConnector::grpcGC);
759 GroupConnector gc;
760 if (!parseGroupConnector(allowOrGrpc, declInputLevel, groupInputLevel, gc))
761 return 0;
762 if (gc.type == GroupConnector::grpcGC)
763 break;
764 }
765 return 1;
766 }
767
parseModelGroup(unsigned nestingLevel,unsigned declInputLevel,ModelGroup * & group,Mode oiMode)768 Boolean Parser::parseModelGroup(unsigned nestingLevel, unsigned declInputLevel,
769 ModelGroup *&group, Mode oiMode)
770 {
771 if (nestingLevel - 1 == syntax().grplvl())
772 message(ParserMessages::grplvl, NumberMessageArg(syntax().grplvl()));
773 unsigned groupInputLevel = inputLevel();
774 GroupToken gt;
775 NCVector<Owner<ContentToken> > tokenVector;
776 GroupConnector::Type connector = GroupConnector::grpcGC;
777
778 static AllowedGroupTokens allowContentToken(GroupToken::pcdata,
779 GroupToken::dataTagGroup,
780 GroupToken::elementToken,
781 GroupToken::modelGroup);
782 static AllowedGroupConnectors allowAnyConnectorGrpc(GroupConnector::orGC,
783 GroupConnector::andGC,
784 GroupConnector::seqGC,
785 GroupConnector::grpcGC);
786
787 static AllowedGroupConnectors allowOrGrpc(GroupConnector::orGC,
788 GroupConnector::grpcGC);
789 static AllowedGroupConnectors allowAndGrpc(GroupConnector::andGC,
790 GroupConnector::grpcGC);
791 static AllowedGroupConnectors allowSeqGrpc(GroupConnector::seqGC,
792 GroupConnector::grpcGC);
793 const AllowedGroupConnectors *connectorp = &allowAnyConnectorGrpc;
794
795 GroupConnector gc;
796 Boolean pcdataCheck = 0;
797 do {
798 if (!parseGroupToken(allowContentToken, nestingLevel, declInputLevel,
799 groupInputLevel, gt))
800 return 0;
801 ContentToken *contentToken;
802 if (gt.type == GroupToken::modelGroup)
803 contentToken = gt.model.extract();
804 else
805 contentToken = gt.contentToken.extract();
806 if (tokenVector.size() == syntax().grpcnt())
807 message(ParserMessages::groupCount, NumberMessageArg(syntax().grpcnt()));
808 tokenVector.resize(tokenVector.size() + 1);
809 tokenVector.back() = contentToken;
810 if (!parseGroupConnector(*connectorp, declInputLevel, groupInputLevel, gc))
811 return 0;
812 if (options().warnMixedContentRepOrGroup && gt.type == GroupToken::pcdata) {
813 if (tokenVector.size() != 1)
814 message(ParserMessages::pcdataNotFirstInGroup);
815 else if (gc.type == GroupConnector::seqGC)
816 message(ParserMessages::pcdataInSeqGroup);
817 else
818 pcdataCheck = 1;
819 if (nestingLevel != 1)
820 message(ParserMessages::pcdataInNestedModelGroup);
821 }
822 else if (pcdataCheck) {
823 if (gt.type == GroupToken::modelGroup)
824 message(ParserMessages::pcdataGroupMemberModelGroup);
825 if (contentToken->occurrenceIndicator() != ContentToken::none)
826 message(ParserMessages::pcdataGroupMemberOccurrenceIndicator);
827 }
828 if (tokenVector.size() == 1) {
829 connector = gc.type;
830 switch (gc.type) {
831 case GroupConnector::orGC:
832 connectorp = &allowOrGrpc;
833 break;
834 case GroupConnector::seqGC:
835 connectorp = &allowSeqGrpc;
836 break;
837 case GroupConnector::andGC:
838 connectorp = &allowAndGrpc;
839 if (options().warnAndGroup)
840 message(ParserMessages::andGroup);
841 break;
842 default:
843 break;
844 }
845 }
846 } while (gc.type != GroupConnector::grpcGC);
847 ContentToken::OccurrenceIndicator oi
848 = getOccurrenceIndicator(oiMode);
849 switch (connector) {
850 case GroupConnector::orGC:
851 group = new OrModelGroup(tokenVector, oi);
852 if (pcdataCheck && oi != ContentToken::rep)
853 message(ParserMessages::pcdataGroupNotRep);
854 break;
855 case GroupConnector::grpcGC:
856 if (pcdataCheck && oi != ContentToken::rep && oi != ContentToken::none)
857 message(ParserMessages::pcdataGroupNotRep);
858 // fall through
859 case GroupConnector::seqGC:
860 group = new SeqModelGroup(tokenVector, oi);
861 break;
862 case GroupConnector::andGC:
863 group = new AndModelGroup(tokenVector, oi);
864 break;
865 default:
866 break;
867 }
868 return 1;
869 }
870
871 ContentToken::OccurrenceIndicator
getOccurrenceIndicator(Mode oiMode)872 Parser::getOccurrenceIndicator(Mode oiMode)
873 {
874 Token token = getToken(oiMode);
875 switch (token) {
876 case tokenPlus:
877 if (currentMarkup())
878 currentMarkup()->addDelim(Syntax::dPLUS);
879 return ContentToken::plus;
880 case tokenOpt:
881 if (currentMarkup())
882 currentMarkup()->addDelim(Syntax::dOPT);
883 return ContentToken::opt;
884 case tokenRep:
885 if (currentMarkup())
886 currentMarkup()->addDelim(Syntax::dREP);
887 return ContentToken::rep;
888 default:
889 currentInput()->ungetToken();
890 return ContentToken::none;
891 }
892 }
893
parseMinimumLiteral(Boolean lita,Text & text)894 Boolean Parser::parseMinimumLiteral(Boolean lita, Text &text)
895 {
896 return parseLiteral(lita ? mlitaMode : mlitMode, mlitMode,
897 Syntax::referenceQuantity(Syntax::qLITLEN),
898 ParserMessages::minimumLiteralLength,
899 literalSingleSpace|literalMinimumData
900 |(eventsWanted().wantPrologMarkup()
901 ? literalDelimInfo
902 : 0),
903 text);
904 }
905
parseSystemIdentifier(Boolean lita,Text & text)906 Boolean Parser::parseSystemIdentifier(Boolean lita, Text &text)
907 {
908 return parseLiteral(lita ? slitaMode : slitMode, slitMode, syntax().litlen(),
909 ParserMessages::systemIdentifierLength,
910 (eventsWanted().wantPrologMarkup()
911 ? literalDelimInfo
912 : 0), text);
913 }
914
parseParameterLiteral(Boolean lita,Text & text)915 Boolean Parser::parseParameterLiteral(Boolean lita, Text &text)
916 {
917 return parseLiteral(lita ? plitaMode : plitMode, pliteMode, syntax().litlen(),
918 ParserMessages::parameterLiteralLength,
919 (eventsWanted().wantPrologMarkup()
920 ? literalDelimInfo
921 : 0),
922 text);
923 }
924
parseDataTagParameterLiteral(Boolean lita,Text & text)925 Boolean Parser::parseDataTagParameterLiteral(Boolean lita, Text &text)
926 {
927 return parseLiteral(lita ? plitaMode : plitMode, pliteMode,
928 syntax().dtemplen(),
929 ParserMessages::dataTagPatternLiteralLength,
930 literalDataTag
931 | (eventsWanted().wantPrologMarkup()
932 ? literalDelimInfo
933 : 0),
934 text);
935 }
936
parseIndicatedReservedName(const AllowedParams & allow,Param & parm)937 Boolean Parser::parseIndicatedReservedName(const AllowedParams &allow,
938 Param &parm)
939 {
940 Syntax::ReservedName rn;
941 if (!getIndicatedReservedName(&rn))
942 return 0;
943 if (!allow.reservedName(rn)) {
944 message(ParserMessages::invalidReservedName,
945 StringMessageArg(currentToken()));
946 return 0;
947 }
948 parm.type = Param::indicatedReservedName + rn;
949 return 1;
950 }
951
parseReservedName(const AllowedParams & allow,Param & parm)952 Boolean Parser::parseReservedName(const AllowedParams &allow,
953 Param &parm)
954 {
955 Syntax::ReservedName rn;
956 if (!getReservedName(&rn))
957 return 0;
958 if (!allow.reservedName(rn)) {
959 message(ParserMessages::invalidReservedName,
960 StringMessageArg(syntax().reservedName(rn)));
961 return 0;
962 }
963 parm.type = Param::reservedName + rn;
964 return 1;
965 }
966
967
parseAttributeValueParam(Param & parm)968 Boolean Parser::parseAttributeValueParam(Param &parm)
969 {
970 extendNameToken(syntax().litlen() > syntax().normsep()
971 ? syntax().litlen() - syntax().normsep()
972 : 0,
973 ParserMessages::attributeValueLength);
974 parm.type = Param::attributeValue;
975 Text text;
976 text.addChars(currentInput()->currentTokenStart(),
977 currentInput()->currentTokenLength(),
978 currentLocation());
979 text.swap(parm.literalText);
980 if (currentMarkup())
981 currentMarkup()->addAttributeValue(currentInput());
982 return 1;
983 }
984
getIndicatedReservedName(Syntax::ReservedName * result)985 Boolean Parser::getIndicatedReservedName(Syntax::ReservedName *result)
986 {
987 if (currentMarkup())
988 currentMarkup()->addDelim(Syntax::dRNI);
989 InputSource *in = currentInput();
990 in->startToken();
991 if (!syntax().isNameStartCharacter(in->tokenChar(messenger()))) {
992 message(ParserMessages::rniNameStart);
993 return 0;
994 }
995 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
996 StringC &buffer = nameBuffer();
997 getCurrentToken(syntax().generalSubstTable(), buffer);
998 if (!syntax().lookupReservedName(buffer, result)) {
999 message(ParserMessages::noSuchReservedName, StringMessageArg(buffer));
1000 return 0;
1001 }
1002 if (currentMarkup())
1003 currentMarkup()->addReservedName(*result, currentInput());
1004 return 1;
1005 }
1006
getReservedName(Syntax::ReservedName * result)1007 Boolean Parser::getReservedName(Syntax::ReservedName *result)
1008 {
1009 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
1010 StringC &buffer = nameBuffer();
1011 getCurrentToken(syntax().generalSubstTable(), buffer);
1012 if (!syntax().lookupReservedName(buffer, result)) {
1013 message(ParserMessages::noSuchReservedName, StringMessageArg(buffer));
1014 return 0;
1015 }
1016 if (currentMarkup())
1017 currentMarkup()->addReservedName(*result, currentInput());
1018 return 1;
1019 }
1020
1021
1022 #ifdef SP_NAMESPACE
1023 }
1024 #endif
1025