Lines Matching +full:cs +full:- +full:out

3 /*-
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 RCHAR_T *end; /* end of string (-> NUL normally) */
70 # define NPAREN 10 /* we need to remember () 1-9 for back refs */
71 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
72 sopno pend[NPAREN]; /* -> ) ([0] unused) */
88 static void p_b_term(struct parse *p, cset *cs);
89 static void p_b_cclass(struct parse *p, cset *cs);
90 static void p_b_eclass(struct parse *p, cset *cs);
100 static void freeset(struct parse *p, cset *cs);
101 static int freezeset(struct parse *p, cset *cs);
102 static int firstch(struct parse *p, cset *cs);
103 static int nch(struct parse *p, cset *cs);
104 static void mcadd(struct parse *p, cset *cs, const char *cp);
106 static void mcsub(cset *cs, char *cp);
107 static int mcin(cset *cs, char *cp);
108 static char *mcfind(cset *cs, char *cp);
110 static void mcinvert(struct parse *p, cset *cs);
111 static void mccase(struct parse *p, cset *cs);
137 #define PEEK() (*p->next)
138 #define PEEK2() (*(p->next+1))
139 #define MORE() (p->next < p->end)
140 #define MORE2() (p->next+1 < p->end)
145 #define NEXT() (p->next++)
146 #define NEXT2() (p->next += 2)
147 #define NEXTn(n) (p->next += (n))
148 #define GETNEXT() (*p->next++)
155 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
156 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
157 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
158 #define HERE() (p->slen)
159 #define THERE() (p->slen - 1)
160 #define THERETHERE() (p->slen - 2)
161 #define DROP(n) (p->slen -= (n))
171 ((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
172 (p)->ncsalloc * sizeof(cset) + \
173 (p)->ssize * sizeof(sop))
177 - regcomp - interface for parser and compilation
198 if (preg->re_endp < pattern) in regcomp()
200 len = preg->re_endp - pattern; in regcomp()
206 (NC-1)*sizeof(cat_t)); in regcomp()
209 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ in regcomp()
210 p->strip = (sop *)malloc(p->ssize * sizeof(sop)); in regcomp()
211 if (p->strip == NULL) { in regcomp()
215 p->stripdata = (RCHAR_T *)malloc(p->ssize * sizeof(RCHAR_T)); in regcomp()
216 if (p->stripdata == NULL) { in regcomp()
217 free((char *)p->strip); in regcomp()
221 p->slen = 0; in regcomp()
224 p->g = g; in regcomp()
225 p->next = (RCHAR_T *)pattern; /* convenience; we do not modify it */ in regcomp()
226 p->end = p->next + len; in regcomp()
227 p->error = 0; in regcomp()
228 p->ncsalloc = 0; in regcomp()
230 p->pbegin[i] = 0; in regcomp()
231 p->pend[i] = 0; in regcomp()
233 g->csetsize = NC; in regcomp()
234 g->sets = NULL; in regcomp()
235 g->setbits = NULL; in regcomp()
236 g->ncsets = 0; in regcomp()
237 g->cflags = cflags; in regcomp()
238 g->iflags = 0; in regcomp()
239 g->nbol = 0; in regcomp()
240 g->neol = 0; in regcomp()
241 g->must = NULL; in regcomp()
242 g->mlen = 0; in regcomp()
243 g->nsub = 0; in regcomp()
245 g->ncategories = 1; /* category 0 is "everything else" */ in regcomp()
246 g->categories = &g->catspace[-(CHAR_MIN)]; in regcomp()
247 memset((char *)g->catspace, 0, NC*sizeof(cat_t)); in regcomp()
249 g->backrefs = 0; in regcomp()
253 g->firststate = THERE(); in regcomp()
255 p_ere(p, OUT, 0); in regcomp()
259 p_bre(p, OUT, OUT, 0); in regcomp()
261 g->laststate = THERE(); in regcomp()
267 g->nplus = pluscount(p, g); in regcomp()
268 g->magic = MAGIC2; in regcomp()
269 preg->re_nsub = g->nsub; in regcomp()
270 preg->re_g = g; in regcomp()
271 preg->re_magic = MAGIC1; in regcomp()
274 if (g->iflags&BAD) in regcomp()
279 if (p->error != 0) /* lose */ in regcomp()
281 return(p->error); in regcomp()
285 - p_ere - ERE parser top level, concatenation and alternation
297 if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { in p_ere()
298 p->error = REG_ESPACE; in p_ere()
310 break; /* NOTE BREAK OUT */ in p_ere()
325 if (!first) { /* tail-end fixups */ in p_ere()
334 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
353 p->g->nsub++; in p_ere_exp()
354 subno = p->g->nsub; in p_ere_exp()
356 p->pbegin[subno] = HERE(); in p_ere_exp()
361 p->pend[subno] = HERE(); in p_ere_exp()
362 assert(p->pend[subno] != 0); in p_ere_exp()
372 * other 1003.2 regular-expression reviewers noticed it at in p_ere_exp()
381 p->g->iflags |= USEBOL; in p_ere_exp()
382 p->g->nbol++; in p_ere_exp()
387 p->g->iflags |= USEEOL; in p_ere_exp()
388 p->g->neol++; in p_ere_exp()
399 if (p->g->cflags&REG_NEWLINE) in p_ere_exp()
481 - p_str - string (no metacharacters) "parser"
492 - p_bre - BRE parser top level, anchoring and concatenation
493 * Giving end1 as OUT essentially eliminates the end1/end2 check.
511 if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { in p_bre()
512 p->error = REG_ESPACE; in p_bre()
520 p->g->iflags |= USEBOL; in p_bre()
521 p->g->nbol++; in p_bre()
530 p->g->iflags |= USEEOL; in p_bre()
531 p->g->neol++; in p_bre()
538 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
566 p->g->nsub++; in p_simp_re()
567 subno = p->g->nsub; in p_simp_re()
569 p->pbegin[subno] = HERE(); in p_simp_re()
575 p->pend[subno] = HERE(); in p_simp_re()
576 assert(p->pend[subno] != 0); in p_simp_re()
581 case ')': /* should not get here -- must be user */ in p_simp_re()
594 i = c - '0'; in p_simp_re()
596 if (p->pend[i] != 0) { in p_simp_re()
597 assert(i <= p->g->nsub); in p_simp_re()
599 assert(p->pbegin[i] != 0); in p_simp_re()
600 assert(p->strip[p->pbegin[i]] == OLPAREN); in p_simp_re()
601 assert(p->strip[p->pend[i]] == ORPAREN); in p_simp_re()
602 (void) dupl(p, p->pbegin[i]+1, p->pend[i]); in p_simp_re()
606 p->g->backrefs = 1; in p_simp_re()
615 if (p->g->cflags&REG_NEWLINE) in p_simp_re()
662 - p_count - parse a repetition count
671 count = count*10 + (GETNEXT() - '0'); in p_count()
680 - p_bracket - parse a bracketed character list
688 cset *cs; in p_bracket() local
693 cs = allocset(p); in p_bracket()
694 if (cs == NULL) in p_bracket()
697 /* Dept of Truly Sickening Special-Case Kludges */ in p_bracket()
698 if (p->next + 5 < p->end && MEMCMP(p->next, bow, 6) == 0) { in p_bracket()
703 if (p->next + 5 < p->end && MEMCMP(p->next, eow, 6) == 0) { in p_bracket()
712 CHadd(cs, ']'); in p_bracket()
713 else if (EAT('-')) in p_bracket()
714 CHadd(cs, '-'); in p_bracket()
715 while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) in p_bracket()
716 p_b_term(p, cs); in p_bracket()
717 if (EAT('-')) in p_bracket()
718 CHadd(cs, '-'); in p_bracket()
721 if (p->error != 0) /* don't mess things up further */ in p_bracket()
724 if (p->g->cflags&REG_ICASE) { in p_bracket()
728 for (i = p->g->csetsize - 1; i >= 0; i--) in p_bracket()
729 if (CHIN(cs, i) && isalpha(i)) { in p_bracket()
732 CHadd(cs, ci); in p_bracket()
734 if (cs->multis != NULL) in p_bracket()
735 mccase(p, cs); in p_bracket()
740 for (i = p->g->csetsize - 1; i >= 0; i--) in p_bracket()
741 if (CHIN(cs, i)) in p_bracket()
742 CHsub(cs, i); in p_bracket()
744 CHadd(cs, i); in p_bracket()
745 if (p->g->cflags&REG_NEWLINE) in p_bracket()
746 CHsub(cs, '\n'); in p_bracket()
747 if (cs->multis != NULL) in p_bracket()
748 mcinvert(p, cs); in p_bracket()
751 assert(cs->multis == NULL); /* xxx */ in p_bracket()
753 if (nch(p, cs) == 1) { /* optimize singleton sets */ in p_bracket()
754 ordinary(p, firstch(p, cs)); in p_bracket()
755 freeset(p, cs); in p_bracket()
757 EMIT(OANYOF, freezeset(p, cs)); in p_bracket()
761 - p_b_term - parse one term of a bracketed character list
764 p_b_term(struct parse *p, cset *cs) in p_b_term() argument
775 case '-': in p_b_term()
789 (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE); in p_b_term()
790 p_b_cclass(p, cs); in p_b_term()
798 (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE); in p_b_term()
799 p_b_eclass(p, cs); in p_b_term()
806 if (SEE('-') && MORE2() && PEEK2() != ']') { in p_b_term()
809 if (EAT('-')) in p_b_term()
810 finish = '-'; in p_b_term()
818 CHadd(cs, i); in p_b_term()
824 - p_b_cclass - parse a character-class name and deal with it
827 p_b_cclass(struct parse *p, cset *cs) in p_b_cclass() argument
829 RCHAR_T *sp = p->next; in p_b_cclass()
837 len = p->next - sp; in p_b_cclass()
838 for (cp = cclasses; cp->name != NULL; cp++) in p_b_cclass()
839 if (STRLEN(cp->name) == len && !MEMCMP(cp->name, sp, len)) in p_b_cclass()
841 if (cp->name == NULL) { in p_b_cclass()
847 u = cp->chars; in p_b_cclass()
849 CHadd(cs, c); in p_b_cclass()
850 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) in p_b_cclass()
851 MCadd(p, cs, u); in p_b_cclass()
855 - p_b_eclass - parse an equivalence-class name and deal with it
860 p_b_eclass(struct parse *p, cset *cs) in p_b_eclass() argument
865 CHadd(cs, c); in p_b_eclass()
869 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
887 - p_b_coll_elem - parse a collating-element name and look it up
894 RCHAR_T *sp = p->next; in p_b_coll_elem()
904 len = p->next - sp; in p_b_coll_elem()
905 for (cp = cnames; cp->name != NULL; cp++) in p_b_coll_elem()
906 if (STRLEN(cp->name) == len && MEMCMP(cp->name, sp, len)) in p_b_coll_elem()
907 return(cp->code); /* known name */ in p_b_coll_elem()
915 - othercase - return the case counterpart of an alphabetic
930 - bothcases - emit a dualcase version of a two-case character
937 RCHAR_T *oldnext = p->next; in bothcases()
938 RCHAR_T *oldend = p->end; in bothcases()
942 p->next = bracket; in bothcases()
943 p->end = bracket+2; in bothcases()
948 assert(p->next == bracket+2); in bothcases()
949 p->next = oldnext; in bothcases()
950 p->end = oldend; in bothcases()
954 - ordinary - emit an ordinary character
960 cat_t *cap = p->g->categories; in ordinary()
963 if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch) in ordinary()
969 cap[ch] = p->g->ncategories++; in ordinary()
975 - nonnewline - emit REG_NEWLINE version of OANY
982 RCHAR_T *oldnext = p->next; in nonnewline()
983 RCHAR_T *oldend = p->end; in nonnewline()
986 p->next = bracket; in nonnewline()
987 p->end = bracket+3; in nonnewline()
993 assert(p->next == bracket+3); in nonnewline()
994 p->next = oldnext; in nonnewline()
995 p->end = oldend; in nonnewline()
999 - repeat - generate code for a bounded repetition, recursively if needed
1016 p->error = REG_ESPACE; in repeat()
1017 if (p->error) in repeat()
1026 DROP(finish-start); /* drop the operand */ in repeat()
1043 case REP(1, N): /* as x?x{1,n-1} */ in repeat()
1053 repeat(p, copy, 1, to-1, reclimit); in repeat()
1059 case REP(N, N): /* as xx{m-1,n-1} */ in repeat()
1061 repeat(p, copy, from-1, to-1, reclimit); in repeat()
1063 case REP(N, INF): /* as xx{n-1,INF} */ in repeat()
1065 repeat(p, copy, from-1, to, reclimit); in repeat()
1074 - seterr - set an error condition
1079 if (p->error == 0) /* keep earliest error condition */ in seterr()
1080 p->error = e; in seterr()
1081 p->next = nuls; /* try to bring things to a halt */ in seterr()
1082 p->end = nuls; in seterr()
1083 return(0); /* make the return value well-defined */ in seterr()
1087 - allocset - allocate a set of characters for []
1092 int no = p->g->ncsets++; in allocset()
1095 cset *cs; in allocset() local
1096 size_t css = (size_t)p->g->csetsize; in allocset()
1099 if (no >= p->ncsalloc) { /* need another column of space */ in allocset()
1100 p->ncsalloc += CHAR_BIT; in allocset()
1101 nc = p->ncsalloc; in allocset()
1106 if (p->g->sets == NULL) in allocset()
1107 p->g->sets = (cset *)malloc(nc * sizeof(cset)); in allocset()
1109 p->g->sets = (cset *)realloc((char *)p->g->sets, in allocset()
1111 if (p->g->setbits == NULL) in allocset()
1112 p->g->setbits = (uch *)malloc(nbytes); in allocset()
1114 p->g->setbits = (uch *)realloc((char *)p->g->setbits, in allocset()
1118 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); in allocset()
1120 if (p->g->sets != NULL && p->g->setbits != NULL) in allocset()
1121 memset((char *)p->g->setbits + (nbytes - css), in allocset()
1132 cs = &p->g->sets[no]; in allocset()
1133 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); in allocset()
1134 cs->mask = 1 << ((no) % CHAR_BIT); in allocset()
1135 cs->hash = 0; in allocset()
1136 cs->smultis = 0; in allocset()
1137 cs->multis = NULL; in allocset()
1139 return(cs); in allocset()
1143 - freeset - free a now-unused set
1146 freeset(struct parse *p, cset *cs) in freeset() argument
1149 cset *top = &p->g->sets[p->g->ncsets]; in freeset()
1150 size_t css = (size_t)p->g->csetsize; in freeset()
1153 CHsub(cs, i); in freeset()
1154 if (cs == top-1) /* recover only the easy case */ in freeset()
1155 p->g->ncsets--; in freeset()
1159 - freezeset - final processing on a set of characters
1164 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1168 freezeset(struct parse *p, cset *cs) in freezeset() argument
1170 uch h = cs->hash; in freezeset()
1172 cset *top = &p->g->sets[p->g->ncsets]; in freezeset()
1174 size_t css = (size_t)p->g->csetsize; in freezeset()
1177 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) in freezeset()
1178 if (cs2->hash == h && cs2 != cs) { in freezeset()
1181 if (!!CHIN(cs2, i) != !!CHIN(cs, i)) in freezeset()
1188 freeset(p, cs); in freezeset()
1189 cs = cs2; in freezeset()
1192 return((int)(cs - p->g->sets)); in freezeset()
1196 - firstch - return first character in a set (which must have at least one)
1199 firstch(struct parse *p, cset *cs) in firstch() argument
1202 size_t css = (size_t)p->g->csetsize; in firstch()
1205 if (CHIN(cs, i)) in firstch()
1212 - nch - number of characters in a set
1215 nch(struct parse *p, cset *cs) in nch() argument
1218 size_t css = (size_t)p->g->csetsize; in nch()
1222 if (CHIN(cs, i)) in nch()
1228 - mcadd - add a collating element to a cset
1231 mcadd(struct parse *p, cset *cs, const char *cp) in mcadd() argument
1233 size_t oldend = cs->smultis; in mcadd()
1236 cs->smultis += strlen(cp) + 1; in mcadd()
1237 np = realloc(cs->multis, cs->smultis); in mcadd()
1239 if (cs->multis) in mcadd()
1240 free(cs->multis); in mcadd()
1241 cs->multis = NULL; in mcadd()
1245 cs->multis = np; in mcadd()
1247 strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); in mcadd()
1251 - mcinvert - invert the list of collating elements in a cset
1257 mcinvert(struct parse *p, cset *cs) in mcinvert() argument
1259 assert(cs->multis == NULL); /* xxx */ in mcinvert()
1263 - mccase - add case counterparts of the list of collating elements in a cset
1269 mccase(struct parse *p, cset *cs) in mccase() argument
1271 assert(cs->multis == NULL); /* xxx */ in mccase()
1276 - isinsets - is this character in any sets?
1283 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; in isinsets()
1286 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) in isinsets()
1293 - samesets - are these two characters in exactly the same sets?
1300 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; in samesets()
1304 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) in samesets()
1312 - categorize - sort out character categories
1318 cat_t *cats = g->categories; in categorize()
1324 if (p->error != 0) in categorize()
1329 cat = g->ncategories++; in categorize()
1339 - dupl - emit a duplicate of a bunch of sops
1347 sopno len = finish - start; in dupl()
1352 if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */ in dupl()
1354 assert(p->ssize >= p->slen + len); in dupl()
1355 (void) memcpy((char *)(p->strip + p->slen), in dupl()
1356 (char *)(p->strip + start), (size_t)len*sizeof(sop)); in dupl()
1357 (void) memcpy((char *)(p->stripdata + p->slen), in dupl()
1358 (char *)(p->stripdata + start), (size_t)len*sizeof(RCHAR_T)); in dupl()
1359 p->slen += len; in dupl()
1364 - doemit - emit a strip operator
1367 * hard-case backup, but it's just too big and messy unless there are
1374 if (p->error != 0) in doemit()
1381 if (p->slen >= p->ssize) in doemit()
1382 if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */ in doemit()
1386 p->strip[p->slen] = op; in doemit()
1387 p->stripdata[p->slen] = opnd; in doemit()
1388 p->slen++; in doemit()
1392 - doinsert - insert a sop into the strip
1403 if (p->error != 0) in doinsert()
1409 s = p->strip[sn]; in doinsert()
1410 d = p->stripdata[sn]; in doinsert()
1415 if (p->pbegin[i] >= pos) { in doinsert()
1416 p->pbegin[i]++; in doinsert()
1418 if (p->pend[i] >= pos) { in doinsert()
1419 p->pend[i]++; in doinsert()
1423 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], in doinsert()
1424 (HERE()-pos-1)*sizeof(sop)); in doinsert()
1425 memmove((char *)&p->stripdata[pos+1], (char *)&p->stripdata[pos], in doinsert()
1426 (HERE()-pos-1)*sizeof(RCHAR_T)); in doinsert()
1427 p->strip[pos] = s; in doinsert()
1428 p->stripdata[pos] = d; in doinsert()
1432 - dofwd - complete a forward reference
1438 if (p->error != 0) in dofwd()
1442 p->stripdata[pos] = value; in dofwd()
1446 - enlarge - enlarge the strip
1455 if (p->ssize >= size) in enlarge()
1458 osize = p->ssize; in enlarge()
1459 p->ssize = size; in enlarge()
1462 sp = realloc(p->strip, p->ssize * sizeof(sop)); in enlarge()
1465 p->strip = sp; in enlarge()
1466 dp = realloc(p->stripdata, p->ssize * sizeof(RCHAR_T)); in enlarge()
1469 p->ssize = osize; in enlarge()
1473 p->stripdata = dp; in enlarge()
1478 - stripsnug - compact the strip
1483 g->nstates = p->slen; in stripsnug()
1484 g->strip = (sop *)realloc((char *)p->strip, in stripsnug()
1485 p->slen * sizeof(sop)); in stripsnug()
1486 if (g->strip == NULL) { in stripsnug()
1488 g->strip = p->strip; in stripsnug()
1490 g->stripdata = (RCHAR_T *)realloc((char *)p->stripdata, in stripsnug()
1491 p->slen * sizeof(RCHAR_T)); in stripsnug()
1492 if (g->stripdata == NULL) { in stripsnug()
1494 g->stripdata = p->stripdata; in stripsnug()
1499 - findmust - fill in must and mlen with longest mandatory literal string
1523 if (p->error != 0) in findmust()
1528 scans = g->strip + 1; in findmust()
1529 scand = g->stripdata + 1; in findmust()
1536 newstarts = scans - 1; in findmust()
1537 newstartd = scand - 1; in findmust()
1547 scans--; in findmust()
1548 scand--; in findmust()
1556 g->iflags |= BAD; in findmust()
1562 if (newlen > g->mlen) { /* ends one */ in findmust()
1565 g->mlen = newlen; in findmust()
1572 if (g->mlen == 0) /* there isn't one */ in findmust()
1576 g->must = malloc(((size_t)g->mlen + 1) * sizeof(RCHAR_T)); in findmust()
1577 if (g->must == NULL) { /* argh; just forget it */ in findmust()
1578 g->mlen = 0; in findmust()
1581 cp = g->must; in findmust()
1584 for (i = g->mlen; i > 0; i--) { in findmust()
1591 assert(cp < g->must + g->mlen); in findmust()
1594 assert(cp == g->must + g->mlen); in findmust()
1599 - pluscount - count + nesting
1609 if (p->error != 0) in pluscount()
1612 scan = g->strip + 1; in pluscount()
1622 plusnest--; in pluscount()
1627 g->iflags |= BAD; in pluscount()