1*d9805213SSascha Wildner /* ccl - routines for character classes */
2*d9805213SSascha Wildner
3*d9805213SSascha Wildner /* Copyright (c) 1990 The Regents of the University of California. */
4*d9805213SSascha Wildner /* All rights reserved. */
5*d9805213SSascha Wildner
6*d9805213SSascha Wildner /* This code is derived from software contributed to Berkeley by */
7*d9805213SSascha Wildner /* Vern Paxson. */
8*d9805213SSascha Wildner
9*d9805213SSascha Wildner /* The United States Government has rights in this work pursuant */
10*d9805213SSascha Wildner /* to contract no. DE-AC03-76SF00098 between the United States */
11*d9805213SSascha Wildner /* Department of Energy and the University of California. */
12*d9805213SSascha Wildner
13*d9805213SSascha Wildner /* This file is part of flex. */
14*d9805213SSascha Wildner
15*d9805213SSascha Wildner /* Redistribution and use in source and binary forms, with or without */
16*d9805213SSascha Wildner /* modification, are permitted provided that the following conditions */
17*d9805213SSascha Wildner /* are met: */
18*d9805213SSascha Wildner
19*d9805213SSascha Wildner /* 1. Redistributions of source code must retain the above copyright */
20*d9805213SSascha Wildner /* notice, this list of conditions and the following disclaimer. */
21*d9805213SSascha Wildner /* 2. Redistributions in binary form must reproduce the above copyright */
22*d9805213SSascha Wildner /* notice, this list of conditions and the following disclaimer in the */
23*d9805213SSascha Wildner /* documentation and/or other materials provided with the distribution. */
24*d9805213SSascha Wildner
25*d9805213SSascha Wildner /* Neither the name of the University nor the names of its contributors */
26*d9805213SSascha Wildner /* may be used to endorse or promote products derived from this software */
27*d9805213SSascha Wildner /* without specific prior written permission. */
28*d9805213SSascha Wildner
29*d9805213SSascha Wildner /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30*d9805213SSascha Wildner /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31*d9805213SSascha Wildner /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32*d9805213SSascha Wildner /* PURPOSE. */
33*d9805213SSascha Wildner
34*d9805213SSascha Wildner #include "flexdef.h"
35*d9805213SSascha Wildner
36*d9805213SSascha Wildner /* return true if the chr is in the ccl. Takes negation into account. */
37*d9805213SSascha Wildner static bool
ccl_contains(const int cclp,const int ch)38*d9805213SSascha Wildner ccl_contains (const int cclp, const int ch)
39*d9805213SSascha Wildner {
40*d9805213SSascha Wildner int ind, len, i;
41*d9805213SSascha Wildner
42*d9805213SSascha Wildner len = ccllen[cclp];
43*d9805213SSascha Wildner ind = cclmap[cclp];
44*d9805213SSascha Wildner
45*d9805213SSascha Wildner for (i = 0; i < len; ++i)
46*d9805213SSascha Wildner if (ccltbl[ind + i] == ch)
47*d9805213SSascha Wildner return !cclng[cclp];
48*d9805213SSascha Wildner
49*d9805213SSascha Wildner return cclng[cclp];
50*d9805213SSascha Wildner }
51*d9805213SSascha Wildner
52*d9805213SSascha Wildner
53*d9805213SSascha Wildner /* ccladd - add a single character to a ccl */
54*d9805213SSascha Wildner
ccladd(int cclp,int ch)55*d9805213SSascha Wildner void ccladd (int cclp, int ch)
56*d9805213SSascha Wildner {
57*d9805213SSascha Wildner int ind, len, newpos, i;
58*d9805213SSascha Wildner
59*d9805213SSascha Wildner check_char (ch);
60*d9805213SSascha Wildner
61*d9805213SSascha Wildner len = ccllen[cclp];
62*d9805213SSascha Wildner ind = cclmap[cclp];
63*d9805213SSascha Wildner
64*d9805213SSascha Wildner /* check to see if the character is already in the ccl */
65*d9805213SSascha Wildner
66*d9805213SSascha Wildner for (i = 0; i < len; ++i)
67*d9805213SSascha Wildner if (ccltbl[ind + i] == ch)
68*d9805213SSascha Wildner return;
69*d9805213SSascha Wildner
70*d9805213SSascha Wildner /* mark newlines */
71*d9805213SSascha Wildner if (ch == nlch)
72*d9805213SSascha Wildner ccl_has_nl[cclp] = true;
73*d9805213SSascha Wildner
74*d9805213SSascha Wildner newpos = ind + len;
75*d9805213SSascha Wildner
76*d9805213SSascha Wildner if (newpos >= current_max_ccl_tbl_size) {
77*d9805213SSascha Wildner current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
78*d9805213SSascha Wildner
79*d9805213SSascha Wildner ++num_reallocs;
80*d9805213SSascha Wildner
81*d9805213SSascha Wildner ccltbl = reallocate_Character_array (ccltbl,
82*d9805213SSascha Wildner current_max_ccl_tbl_size);
83*d9805213SSascha Wildner }
84*d9805213SSascha Wildner
85*d9805213SSascha Wildner ccllen[cclp] = len + 1;
86*d9805213SSascha Wildner ccltbl[newpos] = (unsigned char) ch;
87*d9805213SSascha Wildner }
88*d9805213SSascha Wildner
89*d9805213SSascha Wildner /* dump_cclp - same thing as list_character_set, but for cclps. */
90*d9805213SSascha Wildner
dump_cclp(FILE * file,int cclp)91*d9805213SSascha Wildner static void dump_cclp (FILE* file, int cclp)
92*d9805213SSascha Wildner {
93*d9805213SSascha Wildner int i;
94*d9805213SSascha Wildner
95*d9805213SSascha Wildner putc ('[', file);
96*d9805213SSascha Wildner
97*d9805213SSascha Wildner for (i = 0; i < csize; ++i) {
98*d9805213SSascha Wildner if (ccl_contains(cclp, i)){
99*d9805213SSascha Wildner int start_char = i;
100*d9805213SSascha Wildner
101*d9805213SSascha Wildner putc (' ', file);
102*d9805213SSascha Wildner
103*d9805213SSascha Wildner fputs (readable_form (i), file);
104*d9805213SSascha Wildner
105*d9805213SSascha Wildner while (++i < csize && ccl_contains(cclp,i)) ;
106*d9805213SSascha Wildner
107*d9805213SSascha Wildner if (i - 1 > start_char)
108*d9805213SSascha Wildner /* this was a run */
109*d9805213SSascha Wildner fprintf (file, "-%s",
110*d9805213SSascha Wildner readable_form (i - 1));
111*d9805213SSascha Wildner
112*d9805213SSascha Wildner putc (' ', file);
113*d9805213SSascha Wildner }
114*d9805213SSascha Wildner }
115*d9805213SSascha Wildner
116*d9805213SSascha Wildner putc (']', file);
117*d9805213SSascha Wildner }
118*d9805213SSascha Wildner
119*d9805213SSascha Wildner
120*d9805213SSascha Wildner
121*d9805213SSascha Wildner /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
122*d9805213SSascha Wildner int
ccl_set_diff(int a,int b)123*d9805213SSascha Wildner ccl_set_diff (int a, int b)
124*d9805213SSascha Wildner {
125*d9805213SSascha Wildner int d, ch;
126*d9805213SSascha Wildner
127*d9805213SSascha Wildner /* create new class */
128*d9805213SSascha Wildner d = cclinit();
129*d9805213SSascha Wildner
130*d9805213SSascha Wildner /* In order to handle negation, we spin through all possible chars,
131*d9805213SSascha Wildner * addding each char in a that is not in b.
132*d9805213SSascha Wildner * (This could be O(n^2), but n is small and bounded.)
133*d9805213SSascha Wildner */
134*d9805213SSascha Wildner for ( ch = 0; ch < csize; ++ch )
135*d9805213SSascha Wildner if (ccl_contains (a, ch) && !ccl_contains(b, ch))
136*d9805213SSascha Wildner ccladd (d, ch);
137*d9805213SSascha Wildner
138*d9805213SSascha Wildner /* debug */
139*d9805213SSascha Wildner if (0){
140*d9805213SSascha Wildner fprintf(stderr, "ccl_set_diff (");
141*d9805213SSascha Wildner fprintf(stderr, "\n ");
142*d9805213SSascha Wildner dump_cclp (stderr, a);
143*d9805213SSascha Wildner fprintf(stderr, "\n ");
144*d9805213SSascha Wildner dump_cclp (stderr, b);
145*d9805213SSascha Wildner fprintf(stderr, "\n ");
146*d9805213SSascha Wildner dump_cclp (stderr, d);
147*d9805213SSascha Wildner fprintf(stderr, "\n)\n");
148*d9805213SSascha Wildner }
149*d9805213SSascha Wildner return d;
150*d9805213SSascha Wildner }
151*d9805213SSascha Wildner
152*d9805213SSascha Wildner /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
153*d9805213SSascha Wildner int
ccl_set_union(int a,int b)154*d9805213SSascha Wildner ccl_set_union (int a, int b)
155*d9805213SSascha Wildner {
156*d9805213SSascha Wildner int d, i;
157*d9805213SSascha Wildner
158*d9805213SSascha Wildner /* create new class */
159*d9805213SSascha Wildner d = cclinit();
160*d9805213SSascha Wildner
161*d9805213SSascha Wildner /* Add all of a */
162*d9805213SSascha Wildner for (i = 0; i < ccllen[a]; ++i)
163*d9805213SSascha Wildner ccladd (d, ccltbl[cclmap[a] + i]);
164*d9805213SSascha Wildner
165*d9805213SSascha Wildner /* Add all of b */
166*d9805213SSascha Wildner for (i = 0; i < ccllen[b]; ++i)
167*d9805213SSascha Wildner ccladd (d, ccltbl[cclmap[b] + i]);
168*d9805213SSascha Wildner
169*d9805213SSascha Wildner /* debug */
170*d9805213SSascha Wildner if (0){
171*d9805213SSascha Wildner fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
172*d9805213SSascha Wildner fprintf(stderr, "\n ");
173*d9805213SSascha Wildner dump_cclp (stderr, a);
174*d9805213SSascha Wildner fprintf(stderr, "\n ");
175*d9805213SSascha Wildner dump_cclp (stderr, b);
176*d9805213SSascha Wildner fprintf(stderr, "\n ");
177*d9805213SSascha Wildner dump_cclp (stderr, d);
178*d9805213SSascha Wildner fprintf(stderr, "\n)\n");
179*d9805213SSascha Wildner }
180*d9805213SSascha Wildner return d;
181*d9805213SSascha Wildner }
182*d9805213SSascha Wildner
183*d9805213SSascha Wildner
184*d9805213SSascha Wildner /* cclinit - return an empty ccl */
185*d9805213SSascha Wildner
cclinit(void)186*d9805213SSascha Wildner int cclinit (void)
187*d9805213SSascha Wildner {
188*d9805213SSascha Wildner if (++lastccl >= current_maxccls) {
189*d9805213SSascha Wildner current_maxccls += MAX_CCLS_INCREMENT;
190*d9805213SSascha Wildner
191*d9805213SSascha Wildner ++num_reallocs;
192*d9805213SSascha Wildner
193*d9805213SSascha Wildner cclmap =
194*d9805213SSascha Wildner reallocate_integer_array (cclmap, current_maxccls);
195*d9805213SSascha Wildner ccllen =
196*d9805213SSascha Wildner reallocate_integer_array (ccllen, current_maxccls);
197*d9805213SSascha Wildner cclng = reallocate_integer_array (cclng, current_maxccls);
198*d9805213SSascha Wildner ccl_has_nl =
199*d9805213SSascha Wildner reallocate_bool_array (ccl_has_nl,
200*d9805213SSascha Wildner current_maxccls);
201*d9805213SSascha Wildner }
202*d9805213SSascha Wildner
203*d9805213SSascha Wildner if (lastccl == 1)
204*d9805213SSascha Wildner /* we're making the first ccl */
205*d9805213SSascha Wildner cclmap[lastccl] = 0;
206*d9805213SSascha Wildner
207*d9805213SSascha Wildner else
208*d9805213SSascha Wildner /* The new pointer is just past the end of the last ccl.
209*d9805213SSascha Wildner * Since the cclmap points to the \first/ character of a
210*d9805213SSascha Wildner * ccl, adding the length of the ccl to the cclmap pointer
211*d9805213SSascha Wildner * will produce a cursor to the first free space.
212*d9805213SSascha Wildner */
213*d9805213SSascha Wildner cclmap[lastccl] =
214*d9805213SSascha Wildner cclmap[lastccl - 1] + ccllen[lastccl - 1];
215*d9805213SSascha Wildner
216*d9805213SSascha Wildner ccllen[lastccl] = 0;
217*d9805213SSascha Wildner cclng[lastccl] = 0; /* ccl's start out life un-negated */
218*d9805213SSascha Wildner ccl_has_nl[lastccl] = false;
219*d9805213SSascha Wildner
220*d9805213SSascha Wildner return lastccl;
221*d9805213SSascha Wildner }
222*d9805213SSascha Wildner
223*d9805213SSascha Wildner
224*d9805213SSascha Wildner /* cclnegate - negate the given ccl */
225*d9805213SSascha Wildner
cclnegate(int cclp)226*d9805213SSascha Wildner void cclnegate (int cclp)
227*d9805213SSascha Wildner {
228*d9805213SSascha Wildner cclng[cclp] = 1;
229*d9805213SSascha Wildner ccl_has_nl[cclp] = !ccl_has_nl[cclp];
230*d9805213SSascha Wildner }
231*d9805213SSascha Wildner
232*d9805213SSascha Wildner
233*d9805213SSascha Wildner /* list_character_set - list the members of a set of characters in CCL form
234*d9805213SSascha Wildner *
235*d9805213SSascha Wildner * Writes to the given file a character-class representation of those
236*d9805213SSascha Wildner * characters present in the given CCL. A character is present if it
237*d9805213SSascha Wildner * has a non-zero value in the cset array.
238*d9805213SSascha Wildner */
239*d9805213SSascha Wildner
list_character_set(FILE * file,int cset[])240*d9805213SSascha Wildner void list_character_set (FILE *file, int cset[])
241*d9805213SSascha Wildner {
242*d9805213SSascha Wildner int i;
243*d9805213SSascha Wildner
244*d9805213SSascha Wildner putc ('[', file);
245*d9805213SSascha Wildner
246*d9805213SSascha Wildner for (i = 0; i < csize; ++i) {
247*d9805213SSascha Wildner if (cset[i]) {
248*d9805213SSascha Wildner int start_char = i;
249*d9805213SSascha Wildner
250*d9805213SSascha Wildner putc (' ', file);
251*d9805213SSascha Wildner
252*d9805213SSascha Wildner fputs (readable_form (i), file);
253*d9805213SSascha Wildner
254*d9805213SSascha Wildner while (++i < csize && cset[i]) ;
255*d9805213SSascha Wildner
256*d9805213SSascha Wildner if (i - 1 > start_char)
257*d9805213SSascha Wildner /* this was a run */
258*d9805213SSascha Wildner fprintf (file, "-%s",
259*d9805213SSascha Wildner readable_form (i - 1));
260*d9805213SSascha Wildner
261*d9805213SSascha Wildner putc (' ', file);
262*d9805213SSascha Wildner }
263*d9805213SSascha Wildner }
264*d9805213SSascha Wildner
265*d9805213SSascha Wildner putc (']', file);
266*d9805213SSascha Wildner }
267*d9805213SSascha Wildner
268*d9805213SSascha Wildner /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
269*d9805213SSascha Wildner * scanner. Specifically, if a lowercase or uppercase character, x, is in the
270*d9805213SSascha Wildner * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
271*d9805213SSascha Wildner * be in the range. If not, then this range is ambiguous, and the function
272*d9805213SSascha Wildner * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that
273*d9805213SSascha Wildner * [a-z] will be labeled ambiguous because it does not include [A-Z].
274*d9805213SSascha Wildner *
275*d9805213SSascha Wildner * @param c1 the lower end of the range
276*d9805213SSascha Wildner * @param c2 the upper end of the range
277*d9805213SSascha Wildner * @return true if [c1-c2] is not ambiguous for a caseless scanner.
278*d9805213SSascha Wildner */
range_covers_case(int c1,int c2)279*d9805213SSascha Wildner bool range_covers_case (int c1, int c2)
280*d9805213SSascha Wildner {
281*d9805213SSascha Wildner int i, o;
282*d9805213SSascha Wildner
283*d9805213SSascha Wildner for (i = c1; i <= c2; i++) {
284*d9805213SSascha Wildner if (has_case (i)) {
285*d9805213SSascha Wildner o = reverse_case (i);
286*d9805213SSascha Wildner if (o < c1 || c2 < o)
287*d9805213SSascha Wildner return false;
288*d9805213SSascha Wildner }
289*d9805213SSascha Wildner }
290*d9805213SSascha Wildner return true;
291*d9805213SSascha Wildner }
292*d9805213SSascha Wildner
293*d9805213SSascha Wildner /** Reverse the case of a character, if possible.
294*d9805213SSascha Wildner * @return c if case-reversal does not apply.
295*d9805213SSascha Wildner */
reverse_case(int c)296*d9805213SSascha Wildner int reverse_case (int c)
297*d9805213SSascha Wildner {
298*d9805213SSascha Wildner return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
299*d9805213SSascha Wildner }
300*d9805213SSascha Wildner
301*d9805213SSascha Wildner /** Return true if c is uppercase or lowercase. */
has_case(int c)302*d9805213SSascha Wildner bool has_case (int c)
303*d9805213SSascha Wildner {
304*d9805213SSascha Wildner return (isupper (c) || islower (c)) ? true : false;
305*d9805213SSascha Wildner }
306