xref: /dflybsd-src/contrib/flex/src/ccl.c (revision 388e4ddaf1c230f115961bdb4bad6a8d3e017c93)
1*d9805213SSascha Wildner /* ccl - routines for character classes */
2*d9805213SSascha Wildner 
3*d9805213SSascha Wildner /*  Copyright (c) 1990 The Regents of the University of California. */
4*d9805213SSascha Wildner /*  All rights reserved. */
5*d9805213SSascha Wildner 
6*d9805213SSascha Wildner /*  This code is derived from software contributed to Berkeley by */
7*d9805213SSascha Wildner /*  Vern Paxson. */
8*d9805213SSascha Wildner 
9*d9805213SSascha Wildner /*  The United States Government has rights in this work pursuant */
10*d9805213SSascha Wildner /*  to contract no. DE-AC03-76SF00098 between the United States */
11*d9805213SSascha Wildner  /*  Department of Energy and the University of California. */
12*d9805213SSascha Wildner 
13*d9805213SSascha Wildner /*  This file is part of flex. */
14*d9805213SSascha Wildner 
15*d9805213SSascha Wildner /*  Redistribution and use in source and binary forms, with or without */
16*d9805213SSascha Wildner /*  modification, are permitted provided that the following conditions */
17*d9805213SSascha Wildner /*  are met: */
18*d9805213SSascha Wildner 
19*d9805213SSascha Wildner /*  1. Redistributions of source code must retain the above copyright */
20*d9805213SSascha Wildner /*     notice, this list of conditions and the following disclaimer. */
21*d9805213SSascha Wildner /*  2. Redistributions in binary form must reproduce the above copyright */
22*d9805213SSascha Wildner /*     notice, this list of conditions and the following disclaimer in the */
23*d9805213SSascha Wildner /*     documentation and/or other materials provided with the distribution. */
24*d9805213SSascha Wildner 
25*d9805213SSascha Wildner /*  Neither the name of the University nor the names of its contributors */
26*d9805213SSascha Wildner /*  may be used to endorse or promote products derived from this software */
27*d9805213SSascha Wildner /*  without specific prior written permission. */
28*d9805213SSascha Wildner 
29*d9805213SSascha Wildner /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30*d9805213SSascha Wildner /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31*d9805213SSascha Wildner /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32*d9805213SSascha Wildner /*  PURPOSE. */
33*d9805213SSascha Wildner 
34*d9805213SSascha Wildner #include "flexdef.h"
35*d9805213SSascha Wildner 
36*d9805213SSascha Wildner /* return true if the chr is in the ccl. Takes negation into account. */
37*d9805213SSascha Wildner static bool
ccl_contains(const int cclp,const int ch)38*d9805213SSascha Wildner ccl_contains (const int cclp, const int ch)
39*d9805213SSascha Wildner {
40*d9805213SSascha Wildner 	int     ind, len, i;
41*d9805213SSascha Wildner 
42*d9805213SSascha Wildner 	len = ccllen[cclp];
43*d9805213SSascha Wildner 	ind = cclmap[cclp];
44*d9805213SSascha Wildner 
45*d9805213SSascha Wildner 	for (i = 0; i < len; ++i)
46*d9805213SSascha Wildner 		if (ccltbl[ind + i] == ch)
47*d9805213SSascha Wildner 			return !cclng[cclp];
48*d9805213SSascha Wildner 
49*d9805213SSascha Wildner     return cclng[cclp];
50*d9805213SSascha Wildner }
51*d9805213SSascha Wildner 
52*d9805213SSascha Wildner 
53*d9805213SSascha Wildner /* ccladd - add a single character to a ccl */
54*d9805213SSascha Wildner 
ccladd(int cclp,int ch)55*d9805213SSascha Wildner void ccladd (int cclp, int ch)
56*d9805213SSascha Wildner {
57*d9805213SSascha Wildner 	int     ind, len, newpos, i;
58*d9805213SSascha Wildner 
59*d9805213SSascha Wildner 	check_char (ch);
60*d9805213SSascha Wildner 
61*d9805213SSascha Wildner 	len = ccllen[cclp];
62*d9805213SSascha Wildner 	ind = cclmap[cclp];
63*d9805213SSascha Wildner 
64*d9805213SSascha Wildner 	/* check to see if the character is already in the ccl */
65*d9805213SSascha Wildner 
66*d9805213SSascha Wildner 	for (i = 0; i < len; ++i)
67*d9805213SSascha Wildner 		if (ccltbl[ind + i] == ch)
68*d9805213SSascha Wildner 			return;
69*d9805213SSascha Wildner 
70*d9805213SSascha Wildner 	/* mark newlines */
71*d9805213SSascha Wildner 	if (ch == nlch)
72*d9805213SSascha Wildner 		ccl_has_nl[cclp] = true;
73*d9805213SSascha Wildner 
74*d9805213SSascha Wildner 	newpos = ind + len;
75*d9805213SSascha Wildner 
76*d9805213SSascha Wildner 	if (newpos >= current_max_ccl_tbl_size) {
77*d9805213SSascha Wildner 		current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
78*d9805213SSascha Wildner 
79*d9805213SSascha Wildner 		++num_reallocs;
80*d9805213SSascha Wildner 
81*d9805213SSascha Wildner 		ccltbl = reallocate_Character_array (ccltbl,
82*d9805213SSascha Wildner 						     current_max_ccl_tbl_size);
83*d9805213SSascha Wildner 	}
84*d9805213SSascha Wildner 
85*d9805213SSascha Wildner 	ccllen[cclp] = len + 1;
86*d9805213SSascha Wildner 	ccltbl[newpos] = (unsigned char) ch;
87*d9805213SSascha Wildner }
88*d9805213SSascha Wildner 
89*d9805213SSascha Wildner /* dump_cclp - same thing as list_character_set, but for cclps.  */
90*d9805213SSascha Wildner 
dump_cclp(FILE * file,int cclp)91*d9805213SSascha Wildner static void    dump_cclp (FILE* file, int cclp)
92*d9805213SSascha Wildner {
93*d9805213SSascha Wildner 	int i;
94*d9805213SSascha Wildner 
95*d9805213SSascha Wildner 	putc ('[', file);
96*d9805213SSascha Wildner 
97*d9805213SSascha Wildner 	for (i = 0; i < csize; ++i) {
98*d9805213SSascha Wildner 		if (ccl_contains(cclp, i)){
99*d9805213SSascha Wildner 			int start_char = i;
100*d9805213SSascha Wildner 
101*d9805213SSascha Wildner 			putc (' ', file);
102*d9805213SSascha Wildner 
103*d9805213SSascha Wildner 			fputs (readable_form (i), file);
104*d9805213SSascha Wildner 
105*d9805213SSascha Wildner 			while (++i < csize && ccl_contains(cclp,i)) ;
106*d9805213SSascha Wildner 
107*d9805213SSascha Wildner 			if (i - 1 > start_char)
108*d9805213SSascha Wildner 				/* this was a run */
109*d9805213SSascha Wildner 				fprintf (file, "-%s",
110*d9805213SSascha Wildner 					 readable_form (i - 1));
111*d9805213SSascha Wildner 
112*d9805213SSascha Wildner 			putc (' ', file);
113*d9805213SSascha Wildner 		}
114*d9805213SSascha Wildner 	}
115*d9805213SSascha Wildner 
116*d9805213SSascha Wildner 	putc (']', file);
117*d9805213SSascha Wildner }
118*d9805213SSascha Wildner 
119*d9805213SSascha Wildner 
120*d9805213SSascha Wildner 
121*d9805213SSascha Wildner /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
122*d9805213SSascha Wildner int
ccl_set_diff(int a,int b)123*d9805213SSascha Wildner ccl_set_diff (int a, int b)
124*d9805213SSascha Wildner {
125*d9805213SSascha Wildner     int  d, ch;
126*d9805213SSascha Wildner 
127*d9805213SSascha Wildner     /* create new class  */
128*d9805213SSascha Wildner     d = cclinit();
129*d9805213SSascha Wildner 
130*d9805213SSascha Wildner     /* In order to handle negation, we spin through all possible chars,
131*d9805213SSascha Wildner      * addding each char in a that is not in b.
132*d9805213SSascha Wildner      * (This could be O(n^2), but n is small and bounded.)
133*d9805213SSascha Wildner      */
134*d9805213SSascha Wildner 	for ( ch = 0; ch < csize; ++ch )
135*d9805213SSascha Wildner         if (ccl_contains (a, ch) && !ccl_contains(b, ch))
136*d9805213SSascha Wildner             ccladd (d, ch);
137*d9805213SSascha Wildner 
138*d9805213SSascha Wildner     /* debug */
139*d9805213SSascha Wildner     if (0){
140*d9805213SSascha Wildner         fprintf(stderr, "ccl_set_diff (");
141*d9805213SSascha Wildner             fprintf(stderr, "\n    ");
142*d9805213SSascha Wildner             dump_cclp (stderr, a);
143*d9805213SSascha Wildner             fprintf(stderr, "\n    ");
144*d9805213SSascha Wildner             dump_cclp (stderr, b);
145*d9805213SSascha Wildner             fprintf(stderr, "\n    ");
146*d9805213SSascha Wildner             dump_cclp (stderr, d);
147*d9805213SSascha Wildner         fprintf(stderr, "\n)\n");
148*d9805213SSascha Wildner     }
149*d9805213SSascha Wildner     return d;
150*d9805213SSascha Wildner }
151*d9805213SSascha Wildner 
152*d9805213SSascha Wildner /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
153*d9805213SSascha Wildner int
ccl_set_union(int a,int b)154*d9805213SSascha Wildner ccl_set_union (int a, int b)
155*d9805213SSascha Wildner {
156*d9805213SSascha Wildner     int  d, i;
157*d9805213SSascha Wildner 
158*d9805213SSascha Wildner     /* create new class  */
159*d9805213SSascha Wildner     d = cclinit();
160*d9805213SSascha Wildner 
161*d9805213SSascha Wildner     /* Add all of a */
162*d9805213SSascha Wildner     for (i = 0; i < ccllen[a]; ++i)
163*d9805213SSascha Wildner 		ccladd (d, ccltbl[cclmap[a] + i]);
164*d9805213SSascha Wildner 
165*d9805213SSascha Wildner     /* Add all of b */
166*d9805213SSascha Wildner     for (i = 0; i < ccllen[b]; ++i)
167*d9805213SSascha Wildner 		ccladd (d, ccltbl[cclmap[b] + i]);
168*d9805213SSascha Wildner 
169*d9805213SSascha Wildner     /* debug */
170*d9805213SSascha Wildner     if (0){
171*d9805213SSascha Wildner         fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
172*d9805213SSascha Wildner             fprintf(stderr, "\n    ");
173*d9805213SSascha Wildner             dump_cclp (stderr, a);
174*d9805213SSascha Wildner             fprintf(stderr, "\n    ");
175*d9805213SSascha Wildner             dump_cclp (stderr, b);
176*d9805213SSascha Wildner             fprintf(stderr, "\n    ");
177*d9805213SSascha Wildner             dump_cclp (stderr, d);
178*d9805213SSascha Wildner         fprintf(stderr, "\n)\n");
179*d9805213SSascha Wildner     }
180*d9805213SSascha Wildner     return d;
181*d9805213SSascha Wildner }
182*d9805213SSascha Wildner 
183*d9805213SSascha Wildner 
184*d9805213SSascha Wildner /* cclinit - return an empty ccl */
185*d9805213SSascha Wildner 
cclinit(void)186*d9805213SSascha Wildner int     cclinit (void)
187*d9805213SSascha Wildner {
188*d9805213SSascha Wildner 	if (++lastccl >= current_maxccls) {
189*d9805213SSascha Wildner 		current_maxccls += MAX_CCLS_INCREMENT;
190*d9805213SSascha Wildner 
191*d9805213SSascha Wildner 		++num_reallocs;
192*d9805213SSascha Wildner 
193*d9805213SSascha Wildner 		cclmap =
194*d9805213SSascha Wildner 			reallocate_integer_array (cclmap, current_maxccls);
195*d9805213SSascha Wildner 		ccllen =
196*d9805213SSascha Wildner 			reallocate_integer_array (ccllen, current_maxccls);
197*d9805213SSascha Wildner 		cclng = reallocate_integer_array (cclng, current_maxccls);
198*d9805213SSascha Wildner 		ccl_has_nl =
199*d9805213SSascha Wildner 			reallocate_bool_array (ccl_has_nl,
200*d9805213SSascha Wildner 					       current_maxccls);
201*d9805213SSascha Wildner 	}
202*d9805213SSascha Wildner 
203*d9805213SSascha Wildner 	if (lastccl == 1)
204*d9805213SSascha Wildner 		/* we're making the first ccl */
205*d9805213SSascha Wildner 		cclmap[lastccl] = 0;
206*d9805213SSascha Wildner 
207*d9805213SSascha Wildner 	else
208*d9805213SSascha Wildner 		/* The new pointer is just past the end of the last ccl.
209*d9805213SSascha Wildner 		 * Since the cclmap points to the \first/ character of a
210*d9805213SSascha Wildner 		 * ccl, adding the length of the ccl to the cclmap pointer
211*d9805213SSascha Wildner 		 * will produce a cursor to the first free space.
212*d9805213SSascha Wildner 		 */
213*d9805213SSascha Wildner 		cclmap[lastccl] =
214*d9805213SSascha Wildner 			cclmap[lastccl - 1] + ccllen[lastccl - 1];
215*d9805213SSascha Wildner 
216*d9805213SSascha Wildner 	ccllen[lastccl] = 0;
217*d9805213SSascha Wildner 	cclng[lastccl] = 0;	/* ccl's start out life un-negated */
218*d9805213SSascha Wildner 	ccl_has_nl[lastccl] = false;
219*d9805213SSascha Wildner 
220*d9805213SSascha Wildner 	return lastccl;
221*d9805213SSascha Wildner }
222*d9805213SSascha Wildner 
223*d9805213SSascha Wildner 
224*d9805213SSascha Wildner /* cclnegate - negate the given ccl */
225*d9805213SSascha Wildner 
cclnegate(int cclp)226*d9805213SSascha Wildner void    cclnegate (int cclp)
227*d9805213SSascha Wildner {
228*d9805213SSascha Wildner 	cclng[cclp] = 1;
229*d9805213SSascha Wildner 	ccl_has_nl[cclp] = !ccl_has_nl[cclp];
230*d9805213SSascha Wildner }
231*d9805213SSascha Wildner 
232*d9805213SSascha Wildner 
233*d9805213SSascha Wildner /* list_character_set - list the members of a set of characters in CCL form
234*d9805213SSascha Wildner  *
235*d9805213SSascha Wildner  * Writes to the given file a character-class representation of those
236*d9805213SSascha Wildner  * characters present in the given CCL.  A character is present if it
237*d9805213SSascha Wildner  * has a non-zero value in the cset array.
238*d9805213SSascha Wildner  */
239*d9805213SSascha Wildner 
list_character_set(FILE * file,int cset[])240*d9805213SSascha Wildner void    list_character_set (FILE *file, int cset[])
241*d9805213SSascha Wildner {
242*d9805213SSascha Wildner 	int i;
243*d9805213SSascha Wildner 
244*d9805213SSascha Wildner 	putc ('[', file);
245*d9805213SSascha Wildner 
246*d9805213SSascha Wildner 	for (i = 0; i < csize; ++i) {
247*d9805213SSascha Wildner 		if (cset[i]) {
248*d9805213SSascha Wildner 			int start_char = i;
249*d9805213SSascha Wildner 
250*d9805213SSascha Wildner 			putc (' ', file);
251*d9805213SSascha Wildner 
252*d9805213SSascha Wildner 			fputs (readable_form (i), file);
253*d9805213SSascha Wildner 
254*d9805213SSascha Wildner 			while (++i < csize && cset[i]) ;
255*d9805213SSascha Wildner 
256*d9805213SSascha Wildner 			if (i - 1 > start_char)
257*d9805213SSascha Wildner 				/* this was a run */
258*d9805213SSascha Wildner 				fprintf (file, "-%s",
259*d9805213SSascha Wildner 					 readable_form (i - 1));
260*d9805213SSascha Wildner 
261*d9805213SSascha Wildner 			putc (' ', file);
262*d9805213SSascha Wildner 		}
263*d9805213SSascha Wildner 	}
264*d9805213SSascha Wildner 
265*d9805213SSascha Wildner 	putc (']', file);
266*d9805213SSascha Wildner }
267*d9805213SSascha Wildner 
268*d9805213SSascha Wildner /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
269*d9805213SSascha Wildner  * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
270*d9805213SSascha Wildner  * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
271*d9805213SSascha Wildner  * be in the range. If not, then this range is ambiguous, and the function
272*d9805213SSascha Wildner  * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
273*d9805213SSascha Wildner  * [a-z] will be labeled ambiguous because it does not include [A-Z].
274*d9805213SSascha Wildner  *
275*d9805213SSascha Wildner  * @param c1 the lower end of the range
276*d9805213SSascha Wildner  * @param c2 the upper end of the range
277*d9805213SSascha Wildner  * @return true if [c1-c2] is not ambiguous for a caseless scanner.
278*d9805213SSascha Wildner  */
range_covers_case(int c1,int c2)279*d9805213SSascha Wildner bool range_covers_case (int c1, int c2)
280*d9805213SSascha Wildner {
281*d9805213SSascha Wildner 	int     i, o;
282*d9805213SSascha Wildner 
283*d9805213SSascha Wildner 	for (i = c1; i <= c2; i++) {
284*d9805213SSascha Wildner 		if (has_case (i)) {
285*d9805213SSascha Wildner 			o = reverse_case (i);
286*d9805213SSascha Wildner 			if (o < c1 || c2 < o)
287*d9805213SSascha Wildner 				return false;
288*d9805213SSascha Wildner 		}
289*d9805213SSascha Wildner 	}
290*d9805213SSascha Wildner 	return true;
291*d9805213SSascha Wildner }
292*d9805213SSascha Wildner 
293*d9805213SSascha Wildner /** Reverse the case of a character, if possible.
294*d9805213SSascha Wildner  * @return c if case-reversal does not apply.
295*d9805213SSascha Wildner  */
reverse_case(int c)296*d9805213SSascha Wildner int reverse_case (int c)
297*d9805213SSascha Wildner {
298*d9805213SSascha Wildner 	return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
299*d9805213SSascha Wildner }
300*d9805213SSascha Wildner 
301*d9805213SSascha Wildner /** Return true if c is uppercase or lowercase. */
has_case(int c)302*d9805213SSascha Wildner bool has_case (int c)
303*d9805213SSascha Wildner {
304*d9805213SSascha Wildner 	return (isupper (c) || islower (c)) ? true : false;
305*d9805213SSascha Wildner }
306