xref: /minix3/external/bsd/flex/dist/ccl.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1357f1050SThomas Veerman /* ccl - routines for character classes */
2357f1050SThomas Veerman 
3357f1050SThomas Veerman /*  Copyright (c) 1990 The Regents of the University of California. */
4357f1050SThomas Veerman /*  All rights reserved. */
5357f1050SThomas Veerman 
6357f1050SThomas Veerman /*  This code is derived from software contributed to Berkeley by */
7357f1050SThomas Veerman /*  Vern Paxson. */
8357f1050SThomas Veerman 
9357f1050SThomas Veerman /*  The United States Government has rights in this work pursuant */
10357f1050SThomas Veerman /*  to contract no. DE-AC03-76SF00098 between the United States */
11357f1050SThomas Veerman  /*  Department of Energy and the University of California. */
12357f1050SThomas Veerman 
13357f1050SThomas Veerman /*  This file is part of flex. */
14357f1050SThomas Veerman 
15357f1050SThomas Veerman /*  Redistribution and use in source and binary forms, with or without */
16357f1050SThomas Veerman /*  modification, are permitted provided that the following conditions */
17357f1050SThomas Veerman /*  are met: */
18357f1050SThomas Veerman 
19357f1050SThomas Veerman /*  1. Redistributions of source code must retain the above copyright */
20357f1050SThomas Veerman /*     notice, this list of conditions and the following disclaimer. */
21357f1050SThomas Veerman /*  2. Redistributions in binary form must reproduce the above copyright */
22357f1050SThomas Veerman /*     notice, this list of conditions and the following disclaimer in the */
23357f1050SThomas Veerman /*     documentation and/or other materials provided with the distribution. */
24357f1050SThomas Veerman 
25357f1050SThomas Veerman /*  Neither the name of the University nor the names of its contributors */
26357f1050SThomas Veerman /*  may be used to endorse or promote products derived from this software */
27357f1050SThomas Veerman /*  without specific prior written permission. */
28357f1050SThomas Veerman 
29357f1050SThomas Veerman /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30357f1050SThomas Veerman /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31357f1050SThomas Veerman /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32357f1050SThomas Veerman /*  PURPOSE. */
33357f1050SThomas Veerman #include "flexdef.h"
34*0a6a1f1dSLionel Sambuc __RCSID("$NetBSD: ccl.c,v 1.3 2014/10/30 18:44:05 christos Exp $");
35*0a6a1f1dSLionel Sambuc 
36357f1050SThomas Veerman 
37357f1050SThomas Veerman /* return true if the chr is in the ccl. Takes negation into account. */
38357f1050SThomas Veerman static bool
ccl_contains(const int cclp,const int ch)39357f1050SThomas Veerman ccl_contains (const int cclp, const int ch)
40357f1050SThomas Veerman {
41357f1050SThomas Veerman 	int     ind, len, i;
42357f1050SThomas Veerman 
43357f1050SThomas Veerman 	len = ccllen[cclp];
44357f1050SThomas Veerman 	ind = cclmap[cclp];
45357f1050SThomas Veerman 
46357f1050SThomas Veerman 	for (i = 0; i < len; ++i)
47357f1050SThomas Veerman 		if (ccltbl[ind + i] == ch)
48357f1050SThomas Veerman 			return !cclng[cclp];
49357f1050SThomas Veerman 
50357f1050SThomas Veerman     return cclng[cclp];
51357f1050SThomas Veerman }
52357f1050SThomas Veerman 
53357f1050SThomas Veerman 
54357f1050SThomas Veerman /* ccladd - add a single character to a ccl */
55357f1050SThomas Veerman 
ccladd(cclp,ch)56357f1050SThomas Veerman void    ccladd (cclp, ch)
57357f1050SThomas Veerman      int     cclp;
58357f1050SThomas Veerman      int     ch;
59357f1050SThomas Veerman {
60357f1050SThomas Veerman 	int     ind, len, newpos, i;
61357f1050SThomas Veerman 
62357f1050SThomas Veerman 	check_char (ch);
63357f1050SThomas Veerman 
64357f1050SThomas Veerman 	len = ccllen[cclp];
65357f1050SThomas Veerman 	ind = cclmap[cclp];
66357f1050SThomas Veerman 
67357f1050SThomas Veerman 	/* check to see if the character is already in the ccl */
68357f1050SThomas Veerman 
69357f1050SThomas Veerman 	for (i = 0; i < len; ++i)
70357f1050SThomas Veerman 		if (ccltbl[ind + i] == ch)
71357f1050SThomas Veerman 			return;
72357f1050SThomas Veerman 
73357f1050SThomas Veerman 	/* mark newlines */
74357f1050SThomas Veerman 	if (ch == nlch)
75357f1050SThomas Veerman 		ccl_has_nl[cclp] = true;
76357f1050SThomas Veerman 
77357f1050SThomas Veerman 	newpos = ind + len;
78357f1050SThomas Veerman 
79357f1050SThomas Veerman 	if (newpos >= current_max_ccl_tbl_size) {
80357f1050SThomas Veerman 		current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
81357f1050SThomas Veerman 
82357f1050SThomas Veerman 		++num_reallocs;
83357f1050SThomas Veerman 
84357f1050SThomas Veerman 		ccltbl = reallocate_Character_array (ccltbl,
85357f1050SThomas Veerman 						     current_max_ccl_tbl_size);
86357f1050SThomas Veerman 	}
87357f1050SThomas Veerman 
88357f1050SThomas Veerman 	ccllen[cclp] = len + 1;
89357f1050SThomas Veerman 	ccltbl[newpos] = ch;
90357f1050SThomas Veerman }
91357f1050SThomas Veerman 
92357f1050SThomas Veerman /* dump_cclp - same thing as list_character_set, but for cclps.  */
93357f1050SThomas Veerman 
dump_cclp(FILE * file,int cclp)94357f1050SThomas Veerman static void    dump_cclp (FILE* file, int cclp)
95357f1050SThomas Veerman {
96357f1050SThomas Veerman 	register int i;
97357f1050SThomas Veerman 
98357f1050SThomas Veerman 	putc ('[', file);
99357f1050SThomas Veerman 
100357f1050SThomas Veerman 	for (i = 0; i < csize; ++i) {
101357f1050SThomas Veerman 		if (ccl_contains(cclp, i)){
102357f1050SThomas Veerman 			register int start_char = i;
103357f1050SThomas Veerman 
104357f1050SThomas Veerman 			putc (' ', file);
105357f1050SThomas Veerman 
106357f1050SThomas Veerman 			fputs (readable_form (i), file);
107357f1050SThomas Veerman 
108357f1050SThomas Veerman 			while (++i < csize && ccl_contains(cclp,i)) ;
109357f1050SThomas Veerman 
110357f1050SThomas Veerman 			if (i - 1 > start_char)
111357f1050SThomas Veerman 				/* this was a run */
112357f1050SThomas Veerman 				fprintf (file, "-%s",
113357f1050SThomas Veerman 					 readable_form (i - 1));
114357f1050SThomas Veerman 
115357f1050SThomas Veerman 			putc (' ', file);
116357f1050SThomas Veerman 		}
117357f1050SThomas Veerman 	}
118357f1050SThomas Veerman 
119357f1050SThomas Veerman 	putc (']', file);
120357f1050SThomas Veerman }
121357f1050SThomas Veerman 
122357f1050SThomas Veerman 
123357f1050SThomas Veerman 
124357f1050SThomas Veerman /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
125357f1050SThomas Veerman int
ccl_set_diff(int a,int b)126357f1050SThomas Veerman ccl_set_diff (int a, int b)
127357f1050SThomas Veerman {
128357f1050SThomas Veerman     int  d, ch;
129357f1050SThomas Veerman 
130357f1050SThomas Veerman     /* create new class  */
131357f1050SThomas Veerman     d = cclinit();
132357f1050SThomas Veerman 
133357f1050SThomas Veerman     /* In order to handle negation, we spin through all possible chars,
134357f1050SThomas Veerman      * addding each char in a that is not in b.
135357f1050SThomas Veerman      * (This could be O(n^2), but n is small and bounded.)
136357f1050SThomas Veerman      */
137357f1050SThomas Veerman 	for ( ch = 0; ch < csize; ++ch )
138357f1050SThomas Veerman         if (ccl_contains (a, ch) && !ccl_contains(b, ch))
139357f1050SThomas Veerman             ccladd (d, ch);
140357f1050SThomas Veerman 
141357f1050SThomas Veerman     /* debug */
142357f1050SThomas Veerman     if (0){
143357f1050SThomas Veerman         fprintf(stderr, "ccl_set_diff (");
144357f1050SThomas Veerman             fprintf(stderr, "\n    ");
145357f1050SThomas Veerman             dump_cclp (stderr, a);
146357f1050SThomas Veerman             fprintf(stderr, "\n    ");
147357f1050SThomas Veerman             dump_cclp (stderr, b);
148357f1050SThomas Veerman             fprintf(stderr, "\n    ");
149357f1050SThomas Veerman             dump_cclp (stderr, d);
150357f1050SThomas Veerman         fprintf(stderr, "\n)\n");
151357f1050SThomas Veerman     }
152357f1050SThomas Veerman     return d;
153357f1050SThomas Veerman }
154357f1050SThomas Veerman 
155357f1050SThomas Veerman /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
156357f1050SThomas Veerman int
ccl_set_union(int a,int b)157357f1050SThomas Veerman ccl_set_union (int a, int b)
158357f1050SThomas Veerman {
159357f1050SThomas Veerman     int  d, i;
160357f1050SThomas Veerman 
161357f1050SThomas Veerman     /* create new class  */
162357f1050SThomas Veerman     d = cclinit();
163357f1050SThomas Veerman 
164357f1050SThomas Veerman     /* Add all of a */
165357f1050SThomas Veerman     for (i = 0; i < ccllen[a]; ++i)
166357f1050SThomas Veerman 		ccladd (d, ccltbl[cclmap[a] + i]);
167357f1050SThomas Veerman 
168357f1050SThomas Veerman     /* Add all of b */
169357f1050SThomas Veerman     for (i = 0; i < ccllen[b]; ++i)
170357f1050SThomas Veerman 		ccladd (d, ccltbl[cclmap[b] + i]);
171357f1050SThomas Veerman 
172357f1050SThomas Veerman     /* debug */
173357f1050SThomas Veerman     if (0){
174357f1050SThomas Veerman         fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
175357f1050SThomas Veerman             fprintf(stderr, "\n    ");
176357f1050SThomas Veerman             dump_cclp (stderr, a);
177357f1050SThomas Veerman             fprintf(stderr, "\n    ");
178357f1050SThomas Veerman             dump_cclp (stderr, b);
179357f1050SThomas Veerman             fprintf(stderr, "\n    ");
180357f1050SThomas Veerman             dump_cclp (stderr, d);
181357f1050SThomas Veerman         fprintf(stderr, "\n)\n");
182357f1050SThomas Veerman     }
183357f1050SThomas Veerman     return d;
184357f1050SThomas Veerman }
185357f1050SThomas Veerman 
186357f1050SThomas Veerman 
187357f1050SThomas Veerman /* cclinit - return an empty ccl */
188357f1050SThomas Veerman 
cclinit()189357f1050SThomas Veerman int     cclinit ()
190357f1050SThomas Veerman {
191357f1050SThomas Veerman 	if (++lastccl >= current_maxccls) {
192357f1050SThomas Veerman 		current_maxccls += MAX_CCLS_INCREMENT;
193357f1050SThomas Veerman 
194357f1050SThomas Veerman 		++num_reallocs;
195357f1050SThomas Veerman 
196357f1050SThomas Veerman 		cclmap =
197357f1050SThomas Veerman 			reallocate_integer_array (cclmap, current_maxccls);
198357f1050SThomas Veerman 		ccllen =
199357f1050SThomas Veerman 			reallocate_integer_array (ccllen, current_maxccls);
200357f1050SThomas Veerman 		cclng = reallocate_integer_array (cclng, current_maxccls);
201357f1050SThomas Veerman 		ccl_has_nl =
202357f1050SThomas Veerman 			reallocate_bool_array (ccl_has_nl,
203357f1050SThomas Veerman 					       current_maxccls);
204357f1050SThomas Veerman 	}
205357f1050SThomas Veerman 
206357f1050SThomas Veerman 	if (lastccl == 1)
207357f1050SThomas Veerman 		/* we're making the first ccl */
208357f1050SThomas Veerman 		cclmap[lastccl] = 0;
209357f1050SThomas Veerman 
210357f1050SThomas Veerman 	else
211357f1050SThomas Veerman 		/* The new pointer is just past the end of the last ccl.
212357f1050SThomas Veerman 		 * Since the cclmap points to the \first/ character of a
213357f1050SThomas Veerman 		 * ccl, adding the length of the ccl to the cclmap pointer
214357f1050SThomas Veerman 		 * will produce a cursor to the first free space.
215357f1050SThomas Veerman 		 */
216357f1050SThomas Veerman 		cclmap[lastccl] =
217357f1050SThomas Veerman 			cclmap[lastccl - 1] + ccllen[lastccl - 1];
218357f1050SThomas Veerman 
219357f1050SThomas Veerman 	ccllen[lastccl] = 0;
220357f1050SThomas Veerman 	cclng[lastccl] = 0;	/* ccl's start out life un-negated */
221357f1050SThomas Veerman 	ccl_has_nl[lastccl] = false;
222357f1050SThomas Veerman 
223357f1050SThomas Veerman 	return lastccl;
224357f1050SThomas Veerman }
225357f1050SThomas Veerman 
226357f1050SThomas Veerman 
227357f1050SThomas Veerman /* cclnegate - negate the given ccl */
228357f1050SThomas Veerman 
cclnegate(cclp)229357f1050SThomas Veerman void    cclnegate (cclp)
230357f1050SThomas Veerman      int     cclp;
231357f1050SThomas Veerman {
232357f1050SThomas Veerman 	cclng[cclp] = 1;
233357f1050SThomas Veerman 	ccl_has_nl[cclp] = !ccl_has_nl[cclp];
234357f1050SThomas Veerman }
235357f1050SThomas Veerman 
236357f1050SThomas Veerman 
237357f1050SThomas Veerman /* list_character_set - list the members of a set of characters in CCL form
238357f1050SThomas Veerman  *
239357f1050SThomas Veerman  * Writes to the given file a character-class representation of those
240357f1050SThomas Veerman  * characters present in the given CCL.  A character is present if it
241357f1050SThomas Veerman  * has a non-zero value in the cset array.
242357f1050SThomas Veerman  */
243357f1050SThomas Veerman 
list_character_set(file,cset)244357f1050SThomas Veerman void    list_character_set (file, cset)
245357f1050SThomas Veerman      FILE   *file;
246357f1050SThomas Veerman      int     cset[];
247357f1050SThomas Veerman {
248357f1050SThomas Veerman 	register int i;
249357f1050SThomas Veerman 
250357f1050SThomas Veerman 	putc ('[', file);
251357f1050SThomas Veerman 
252357f1050SThomas Veerman 	for (i = 0; i < csize; ++i) {
253357f1050SThomas Veerman 		if (cset[i]) {
254357f1050SThomas Veerman 			register int start_char = i;
255357f1050SThomas Veerman 
256357f1050SThomas Veerman 			putc (' ', file);
257357f1050SThomas Veerman 
258357f1050SThomas Veerman 			fputs (readable_form (i), file);
259357f1050SThomas Veerman 
260357f1050SThomas Veerman 			while (++i < csize && cset[i]) ;
261357f1050SThomas Veerman 
262357f1050SThomas Veerman 			if (i - 1 > start_char)
263357f1050SThomas Veerman 				/* this was a run */
264357f1050SThomas Veerman 				fprintf (file, "-%s",
265357f1050SThomas Veerman 					 readable_form (i - 1));
266357f1050SThomas Veerman 
267357f1050SThomas Veerman 			putc (' ', file);
268357f1050SThomas Veerman 		}
269357f1050SThomas Veerman 	}
270357f1050SThomas Veerman 
271357f1050SThomas Veerman 	putc (']', file);
272357f1050SThomas Veerman }
273357f1050SThomas Veerman 
274357f1050SThomas Veerman /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
275357f1050SThomas Veerman  * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
276357f1050SThomas Veerman  * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
277357f1050SThomas Veerman  * be in the range. If not, then this range is ambiguous, and the function
278357f1050SThomas Veerman  * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
279357f1050SThomas Veerman  * [a-z] will be labeled ambiguous because it does not include [A-Z].
280357f1050SThomas Veerman  *
281357f1050SThomas Veerman  * @param c1 the lower end of the range
282357f1050SThomas Veerman  * @param c2 the upper end of the range
283357f1050SThomas Veerman  * @return true if [c1-c2] is not ambiguous for a caseless scanner.
284357f1050SThomas Veerman  */
range_covers_case(int c1,int c2)285357f1050SThomas Veerman bool range_covers_case (int c1, int c2)
286357f1050SThomas Veerman {
287357f1050SThomas Veerman 	int     i, o;
288357f1050SThomas Veerman 
289357f1050SThomas Veerman 	for (i = c1; i <= c2; i++) {
290357f1050SThomas Veerman 		if (has_case (i)) {
291357f1050SThomas Veerman 			o = reverse_case (i);
292357f1050SThomas Veerman 			if (o < c1 || c2 < o)
293357f1050SThomas Veerman 				return false;
294357f1050SThomas Veerman 		}
295357f1050SThomas Veerman 	}
296357f1050SThomas Veerman 	return true;
297357f1050SThomas Veerman }
298357f1050SThomas Veerman 
299357f1050SThomas Veerman /** Reverse the case of a character, if possible.
300357f1050SThomas Veerman  * @return c if case-reversal does not apply.
301357f1050SThomas Veerman  */
reverse_case(int c)302357f1050SThomas Veerman int reverse_case (int c)
303357f1050SThomas Veerman {
304357f1050SThomas Veerman 	return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
305357f1050SThomas Veerman }
306357f1050SThomas Veerman 
307357f1050SThomas Veerman /** Return true if c is uppercase or lowercase. */
has_case(int c)308357f1050SThomas Veerman bool has_case (int c)
309357f1050SThomas Veerman {
310357f1050SThomas Veerman 	return (isupper (c) || islower (c)) ? true : false;
311357f1050SThomas Veerman }
312