xref: /netbsd-src/external/bsd/flex/dist/src/ccl.c (revision 7977e68669c3b784eb65c933a2bd99785638ae00)
1 /* ccl - routines for character classes */
2 
3 /*  Copyright (c) 1990 The Regents of the University of California. */
4 /*  All rights reserved. */
5 
6 /*  This code is derived from software contributed to Berkeley by */
7 /*  Vern Paxson. */
8 
9 /*  The United States Government has rights in this work pursuant */
10 /*  to contract no. DE-AC03-76SF00098 between the United States */
11  /*  Department of Energy and the University of California. */
12 
13 /*  This file is part of flex. */
14 
15 /*  Redistribution and use in source and binary forms, with or without */
16 /*  modification, are permitted provided that the following conditions */
17 /*  are met: */
18 
19 /*  1. Redistributions of source code must retain the above copyright */
20 /*     notice, this list of conditions and the following disclaimer. */
21 /*  2. Redistributions in binary form must reproduce the above copyright */
22 /*     notice, this list of conditions and the following disclaimer in the */
23 /*     documentation and/or other materials provided with the distribution. */
24 
25 /*  Neither the name of the University nor the names of its contributors */
26 /*  may be used to endorse or promote products derived from this software */
27 /*  without specific prior written permission. */
28 
29 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32 /*  PURPOSE. */
33 #include "flexdef.h"
34 __RCSID("$NetBSD: ccl.c,v 1.3 2017/01/02 17:45:27 christos Exp $");
35 
36 
37 /* return true if the chr is in the ccl. Takes negation into account. */
38 static bool
ccl_contains(const int cclp,const int ch)39 ccl_contains (const int cclp, const int ch)
40 {
41 	int     ind, len, i;
42 
43 	len = ccllen[cclp];
44 	ind = cclmap[cclp];
45 
46 	for (i = 0; i < len; ++i)
47 		if (ccltbl[ind + i] == ch)
48 			return !cclng[cclp];
49 
50     return cclng[cclp];
51 }
52 
53 
54 /* ccladd - add a single character to a ccl */
55 
ccladd(int cclp,int ch)56 void ccladd (int cclp, int ch)
57 {
58 	int     ind, len, newpos, i;
59 
60 	check_char (ch);
61 
62 	len = ccllen[cclp];
63 	ind = cclmap[cclp];
64 
65 	/* check to see if the character is already in the ccl */
66 
67 	for (i = 0; i < len; ++i)
68 		if (ccltbl[ind + i] == ch)
69 			return;
70 
71 	/* mark newlines */
72 	if (ch == nlch)
73 		ccl_has_nl[cclp] = true;
74 
75 	newpos = ind + len;
76 
77 	if (newpos >= current_max_ccl_tbl_size) {
78 		current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
79 
80 		++num_reallocs;
81 
82 		ccltbl = reallocate_Character_array (ccltbl,
83 						     current_max_ccl_tbl_size);
84 	}
85 
86 	ccllen[cclp] = len + 1;
87 	ccltbl[newpos] = (unsigned char) ch;
88 }
89 
90 /* dump_cclp - same thing as list_character_set, but for cclps.  */
91 
dump_cclp(FILE * file,int cclp)92 static void    dump_cclp (FILE* file, int cclp)
93 {
94 	int i;
95 
96 	putc ('[', file);
97 
98 	for (i = 0; i < csize; ++i) {
99 		if (ccl_contains(cclp, i)){
100 			int start_char = i;
101 
102 			putc (' ', file);
103 
104 			fputs (readable_form (i), file);
105 
106 			while (++i < csize && ccl_contains(cclp,i)) ;
107 
108 			if (i - 1 > start_char)
109 				/* this was a run */
110 				fprintf (file, "-%s",
111 					 readable_form (i - 1));
112 
113 			putc (' ', file);
114 		}
115 	}
116 
117 	putc (']', file);
118 }
119 
120 
121 
122 /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
123 int
ccl_set_diff(int a,int b)124 ccl_set_diff (int a, int b)
125 {
126     int  d, ch;
127 
128     /* create new class  */
129     d = cclinit();
130 
131     /* In order to handle negation, we spin through all possible chars,
132      * addding each char in a that is not in b.
133      * (This could be O(n^2), but n is small and bounded.)
134      */
135 	for ( ch = 0; ch < csize; ++ch )
136         if (ccl_contains (a, ch) && !ccl_contains(b, ch))
137             ccladd (d, ch);
138 
139     /* debug */
140     if (0){
141         fprintf(stderr, "ccl_set_diff (");
142             fprintf(stderr, "\n    ");
143             dump_cclp (stderr, a);
144             fprintf(stderr, "\n    ");
145             dump_cclp (stderr, b);
146             fprintf(stderr, "\n    ");
147             dump_cclp (stderr, d);
148         fprintf(stderr, "\n)\n");
149     }
150     return d;
151 }
152 
153 /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
154 int
ccl_set_union(int a,int b)155 ccl_set_union (int a, int b)
156 {
157     int  d, i;
158 
159     /* create new class  */
160     d = cclinit();
161 
162     /* Add all of a */
163     for (i = 0; i < ccllen[a]; ++i)
164 		ccladd (d, ccltbl[cclmap[a] + i]);
165 
166     /* Add all of b */
167     for (i = 0; i < ccllen[b]; ++i)
168 		ccladd (d, ccltbl[cclmap[b] + i]);
169 
170     /* debug */
171     if (0){
172         fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
173             fprintf(stderr, "\n    ");
174             dump_cclp (stderr, a);
175             fprintf(stderr, "\n    ");
176             dump_cclp (stderr, b);
177             fprintf(stderr, "\n    ");
178             dump_cclp (stderr, d);
179         fprintf(stderr, "\n)\n");
180     }
181     return d;
182 }
183 
184 
185 /* cclinit - return an empty ccl */
186 
cclinit(void)187 int     cclinit (void)
188 {
189 	if (++lastccl >= current_maxccls) {
190 		current_maxccls += MAX_CCLS_INCREMENT;
191 
192 		++num_reallocs;
193 
194 		cclmap =
195 			reallocate_integer_array (cclmap, current_maxccls);
196 		ccllen =
197 			reallocate_integer_array (ccllen, current_maxccls);
198 		cclng = reallocate_integer_array (cclng, current_maxccls);
199 		ccl_has_nl =
200 			reallocate_bool_array (ccl_has_nl,
201 					       current_maxccls);
202 	}
203 
204 	if (lastccl == 1)
205 		/* we're making the first ccl */
206 		cclmap[lastccl] = 0;
207 
208 	else
209 		/* The new pointer is just past the end of the last ccl.
210 		 * Since the cclmap points to the \first/ character of a
211 		 * ccl, adding the length of the ccl to the cclmap pointer
212 		 * will produce a cursor to the first free space.
213 		 */
214 		cclmap[lastccl] =
215 			cclmap[lastccl - 1] + ccllen[lastccl - 1];
216 
217 	ccllen[lastccl] = 0;
218 	cclng[lastccl] = 0;	/* ccl's start out life un-negated */
219 	ccl_has_nl[lastccl] = false;
220 
221 	return lastccl;
222 }
223 
224 
225 /* cclnegate - negate the given ccl */
226 
cclnegate(int cclp)227 void    cclnegate (int cclp)
228 {
229 	cclng[cclp] = 1;
230 	ccl_has_nl[cclp] = !ccl_has_nl[cclp];
231 }
232 
233 
234 /* list_character_set - list the members of a set of characters in CCL form
235  *
236  * Writes to the given file a character-class representation of those
237  * characters present in the given CCL.  A character is present if it
238  * has a non-zero value in the cset array.
239  */
240 
list_character_set(FILE * file,int cset[])241 void    list_character_set (FILE *file, int cset[])
242 {
243 	int i;
244 
245 	putc ('[', file);
246 
247 	for (i = 0; i < csize; ++i) {
248 		if (cset[i]) {
249 			int start_char = i;
250 
251 			putc (' ', file);
252 
253 			fputs (readable_form (i), file);
254 
255 			while (++i < csize && cset[i]) ;
256 
257 			if (i - 1 > start_char)
258 				/* this was a run */
259 				fprintf (file, "-%s",
260 					 readable_form (i - 1));
261 
262 			putc (' ', file);
263 		}
264 	}
265 
266 	putc (']', file);
267 }
268 
269 /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
270  * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
271  * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
272  * be in the range. If not, then this range is ambiguous, and the function
273  * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
274  * [a-z] will be labeled ambiguous because it does not include [A-Z].
275  *
276  * @param c1 the lower end of the range
277  * @param c2 the upper end of the range
278  * @return true if [c1-c2] is not ambiguous for a caseless scanner.
279  */
range_covers_case(int c1,int c2)280 bool range_covers_case (int c1, int c2)
281 {
282 	int     i, o;
283 
284 	for (i = c1; i <= c2; i++) {
285 		if (has_case (i)) {
286 			o = reverse_case (i);
287 			if (o < c1 || c2 < o)
288 				return false;
289 		}
290 	}
291 	return true;
292 }
293 
294 /** Reverse the case of a character, if possible.
295  * @return c if case-reversal does not apply.
296  */
reverse_case(int c)297 int reverse_case (int c)
298 {
299 	return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
300 }
301 
302 /** Return true if c is uppercase or lowercase. */
has_case(int c)303 bool has_case (int c)
304 {
305 	return (isupper (c) || islower (c)) ? true : false;
306 }
307