xref: /minix3/external/bsd/flex/dist/ccl.c (revision f14fb602092e015ff630df58e17c2a9cd57d29b3)
1 /*	$NetBSD: ccl.c,v 1.1.1.1 2009/10/26 00:25:06 christos Exp $	*/
2 
3 /* ccl - routines for character classes */
4 
5 /*  Copyright (c) 1990 The Regents of the University of California. */
6 /*  All rights reserved. */
7 
8 /*  This code is derived from software contributed to Berkeley by */
9 /*  Vern Paxson. */
10 
11 /*  The United States Government has rights in this work pursuant */
12 /*  to contract no. DE-AC03-76SF00098 between the United States */
13  /*  Department of Energy and the University of California. */
14 
15 /*  This file is part of flex. */
16 
17 /*  Redistribution and use in source and binary forms, with or without */
18 /*  modification, are permitted provided that the following conditions */
19 /*  are met: */
20 
21 /*  1. Redistributions of source code must retain the above copyright */
22 /*     notice, this list of conditions and the following disclaimer. */
23 /*  2. Redistributions in binary form must reproduce the above copyright */
24 /*     notice, this list of conditions and the following disclaimer in the */
25 /*     documentation and/or other materials provided with the distribution. */
26 
27 /*  Neither the name of the University nor the names of its contributors */
28 /*  may be used to endorse or promote products derived from this software */
29 /*  without specific prior written permission. */
30 
31 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
32 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
33 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
34 /*  PURPOSE. */
35 
36 #include "flexdef.h"
37 
38 /* return true if the chr is in the ccl. Takes negation into account. */
39 static bool
40 ccl_contains (const int cclp, const int ch)
41 {
42 	int     ind, len, i;
43 
44 	len = ccllen[cclp];
45 	ind = cclmap[cclp];
46 
47 	for (i = 0; i < len; ++i)
48 		if (ccltbl[ind + i] == ch)
49 			return !cclng[cclp];
50 
51     return cclng[cclp];
52 }
53 
54 
55 /* ccladd - add a single character to a ccl */
56 
57 void    ccladd (cclp, ch)
58      int     cclp;
59      int     ch;
60 {
61 	int     ind, len, newpos, i;
62 
63 	check_char (ch);
64 
65 	len = ccllen[cclp];
66 	ind = cclmap[cclp];
67 
68 	/* check to see if the character is already in the ccl */
69 
70 	for (i = 0; i < len; ++i)
71 		if (ccltbl[ind + i] == ch)
72 			return;
73 
74 	/* mark newlines */
75 	if (ch == nlch)
76 		ccl_has_nl[cclp] = true;
77 
78 	newpos = ind + len;
79 
80 	if (newpos >= current_max_ccl_tbl_size) {
81 		current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
82 
83 		++num_reallocs;
84 
85 		ccltbl = reallocate_Character_array (ccltbl,
86 						     current_max_ccl_tbl_size);
87 	}
88 
89 	ccllen[cclp] = len + 1;
90 	ccltbl[newpos] = ch;
91 }
92 
93 /* dump_cclp - same thing as list_character_set, but for cclps.  */
94 
95 static void    dump_cclp (FILE* file, int cclp)
96 {
97 	register int i;
98 
99 	putc ('[', file);
100 
101 	for (i = 0; i < csize; ++i) {
102 		if (ccl_contains(cclp, i)){
103 			register int start_char = i;
104 
105 			putc (' ', file);
106 
107 			fputs (readable_form (i), file);
108 
109 			while (++i < csize && ccl_contains(cclp,i)) ;
110 
111 			if (i - 1 > start_char)
112 				/* this was a run */
113 				fprintf (file, "-%s",
114 					 readable_form (i - 1));
115 
116 			putc (' ', file);
117 		}
118 	}
119 
120 	putc (']', file);
121 }
122 
123 
124 
125 /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
126 int
127 ccl_set_diff (int a, int b)
128 {
129     int  d, ch;
130 
131     /* create new class  */
132     d = cclinit();
133 
134     /* In order to handle negation, we spin through all possible chars,
135      * addding each char in a that is not in b.
136      * (This could be O(n^2), but n is small and bounded.)
137      */
138 	for ( ch = 0; ch < csize; ++ch )
139         if (ccl_contains (a, ch) && !ccl_contains(b, ch))
140             ccladd (d, ch);
141 
142     /* debug */
143     if (0){
144         fprintf(stderr, "ccl_set_diff (");
145             fprintf(stderr, "\n    ");
146             dump_cclp (stderr, a);
147             fprintf(stderr, "\n    ");
148             dump_cclp (stderr, b);
149             fprintf(stderr, "\n    ");
150             dump_cclp (stderr, d);
151         fprintf(stderr, "\n)\n");
152     }
153     return d;
154 }
155 
156 /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
157 int
158 ccl_set_union (int a, int b)
159 {
160     int  d, i;
161 
162     /* create new class  */
163     d = cclinit();
164 
165     /* Add all of a */
166     for (i = 0; i < ccllen[a]; ++i)
167 		ccladd (d, ccltbl[cclmap[a] + i]);
168 
169     /* Add all of b */
170     for (i = 0; i < ccllen[b]; ++i)
171 		ccladd (d, ccltbl[cclmap[b] + i]);
172 
173     /* debug */
174     if (0){
175         fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
176             fprintf(stderr, "\n    ");
177             dump_cclp (stderr, a);
178             fprintf(stderr, "\n    ");
179             dump_cclp (stderr, b);
180             fprintf(stderr, "\n    ");
181             dump_cclp (stderr, d);
182         fprintf(stderr, "\n)\n");
183     }
184     return d;
185 }
186 
187 
188 /* cclinit - return an empty ccl */
189 
190 int     cclinit ()
191 {
192 	if (++lastccl >= current_maxccls) {
193 		current_maxccls += MAX_CCLS_INCREMENT;
194 
195 		++num_reallocs;
196 
197 		cclmap =
198 			reallocate_integer_array (cclmap, current_maxccls);
199 		ccllen =
200 			reallocate_integer_array (ccllen, current_maxccls);
201 		cclng = reallocate_integer_array (cclng, current_maxccls);
202 		ccl_has_nl =
203 			reallocate_bool_array (ccl_has_nl,
204 					       current_maxccls);
205 	}
206 
207 	if (lastccl == 1)
208 		/* we're making the first ccl */
209 		cclmap[lastccl] = 0;
210 
211 	else
212 		/* The new pointer is just past the end of the last ccl.
213 		 * Since the cclmap points to the \first/ character of a
214 		 * ccl, adding the length of the ccl to the cclmap pointer
215 		 * will produce a cursor to the first free space.
216 		 */
217 		cclmap[lastccl] =
218 			cclmap[lastccl - 1] + ccllen[lastccl - 1];
219 
220 	ccllen[lastccl] = 0;
221 	cclng[lastccl] = 0;	/* ccl's start out life un-negated */
222 	ccl_has_nl[lastccl] = false;
223 
224 	return lastccl;
225 }
226 
227 
228 /* cclnegate - negate the given ccl */
229 
230 void    cclnegate (cclp)
231      int     cclp;
232 {
233 	cclng[cclp] = 1;
234 	ccl_has_nl[cclp] = !ccl_has_nl[cclp];
235 }
236 
237 
238 /* list_character_set - list the members of a set of characters in CCL form
239  *
240  * Writes to the given file a character-class representation of those
241  * characters present in the given CCL.  A character is present if it
242  * has a non-zero value in the cset array.
243  */
244 
245 void    list_character_set (file, cset)
246      FILE   *file;
247      int     cset[];
248 {
249 	register int i;
250 
251 	putc ('[', file);
252 
253 	for (i = 0; i < csize; ++i) {
254 		if (cset[i]) {
255 			register int start_char = i;
256 
257 			putc (' ', file);
258 
259 			fputs (readable_form (i), file);
260 
261 			while (++i < csize && cset[i]) ;
262 
263 			if (i - 1 > start_char)
264 				/* this was a run */
265 				fprintf (file, "-%s",
266 					 readable_form (i - 1));
267 
268 			putc (' ', file);
269 		}
270 	}
271 
272 	putc (']', file);
273 }
274 
275 /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
276  * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
277  * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
278  * be in the range. If not, then this range is ambiguous, and the function
279  * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
280  * [a-z] will be labeled ambiguous because it does not include [A-Z].
281  *
282  * @param c1 the lower end of the range
283  * @param c2 the upper end of the range
284  * @return true if [c1-c2] is not ambiguous for a caseless scanner.
285  */
286 bool range_covers_case (int c1, int c2)
287 {
288 	int     i, o;
289 
290 	for (i = c1; i <= c2; i++) {
291 		if (has_case (i)) {
292 			o = reverse_case (i);
293 			if (o < c1 || c2 < o)
294 				return false;
295 		}
296 	}
297 	return true;
298 }
299 
300 /** Reverse the case of a character, if possible.
301  * @return c if case-reversal does not apply.
302  */
303 int reverse_case (int c)
304 {
305 	return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
306 }
307 
308 /** Return true if c is uppercase or lowercase. */
309 bool has_case (int c)
310 {
311 	return (isupper (c) || islower (c)) ? true : false;
312 }
313