xref: /netbsd-src/external/bsd/flex/dist/src/ccl.c (revision 7977e68669c3b784eb65c933a2bd99785638ae00)
130da1778Schristos /* ccl - routines for character classes */
230da1778Schristos 
330da1778Schristos /*  Copyright (c) 1990 The Regents of the University of California. */
430da1778Schristos /*  All rights reserved. */
530da1778Schristos 
630da1778Schristos /*  This code is derived from software contributed to Berkeley by */
730da1778Schristos /*  Vern Paxson. */
830da1778Schristos 
930da1778Schristos /*  The United States Government has rights in this work pursuant */
1030da1778Schristos /*  to contract no. DE-AC03-76SF00098 between the United States */
1130da1778Schristos  /*  Department of Energy and the University of California. */
1230da1778Schristos 
1330da1778Schristos /*  This file is part of flex. */
1430da1778Schristos 
1530da1778Schristos /*  Redistribution and use in source and binary forms, with or without */
1630da1778Schristos /*  modification, are permitted provided that the following conditions */
1730da1778Schristos /*  are met: */
1830da1778Schristos 
1930da1778Schristos /*  1. Redistributions of source code must retain the above copyright */
2030da1778Schristos /*     notice, this list of conditions and the following disclaimer. */
2130da1778Schristos /*  2. Redistributions in binary form must reproduce the above copyright */
2230da1778Schristos /*     notice, this list of conditions and the following disclaimer in the */
2330da1778Schristos /*     documentation and/or other materials provided with the distribution. */
2430da1778Schristos 
2530da1778Schristos /*  Neither the name of the University nor the names of its contributors */
2630da1778Schristos /*  may be used to endorse or promote products derived from this software */
2730da1778Schristos /*  without specific prior written permission. */
2830da1778Schristos 
2930da1778Schristos /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
3030da1778Schristos /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
3130da1778Schristos /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
3230da1778Schristos /*  PURPOSE. */
3330da1778Schristos #include "flexdef.h"
34*7977e686Schristos __RCSID("$NetBSD: ccl.c,v 1.3 2017/01/02 17:45:27 christos Exp $");
35a11deec2Schristos 
3630da1778Schristos 
3730da1778Schristos /* return true if the chr is in the ccl. Takes negation into account. */
3830da1778Schristos static bool
ccl_contains(const int cclp,const int ch)3930da1778Schristos ccl_contains (const int cclp, const int ch)
4030da1778Schristos {
4130da1778Schristos 	int     ind, len, i;
4230da1778Schristos 
4330da1778Schristos 	len = ccllen[cclp];
4430da1778Schristos 	ind = cclmap[cclp];
4530da1778Schristos 
4630da1778Schristos 	for (i = 0; i < len; ++i)
4730da1778Schristos 		if (ccltbl[ind + i] == ch)
4830da1778Schristos 			return !cclng[cclp];
4930da1778Schristos 
5030da1778Schristos     return cclng[cclp];
5130da1778Schristos }
5230da1778Schristos 
5330da1778Schristos 
5430da1778Schristos /* ccladd - add a single character to a ccl */
5530da1778Schristos 
ccladd(int cclp,int ch)56*7977e686Schristos void ccladd (int cclp, int ch)
5730da1778Schristos {
5830da1778Schristos 	int     ind, len, newpos, i;
5930da1778Schristos 
6030da1778Schristos 	check_char (ch);
6130da1778Schristos 
6230da1778Schristos 	len = ccllen[cclp];
6330da1778Schristos 	ind = cclmap[cclp];
6430da1778Schristos 
6530da1778Schristos 	/* check to see if the character is already in the ccl */
6630da1778Schristos 
6730da1778Schristos 	for (i = 0; i < len; ++i)
6830da1778Schristos 		if (ccltbl[ind + i] == ch)
6930da1778Schristos 			return;
7030da1778Schristos 
7130da1778Schristos 	/* mark newlines */
7230da1778Schristos 	if (ch == nlch)
7330da1778Schristos 		ccl_has_nl[cclp] = true;
7430da1778Schristos 
7530da1778Schristos 	newpos = ind + len;
7630da1778Schristos 
7730da1778Schristos 	if (newpos >= current_max_ccl_tbl_size) {
7830da1778Schristos 		current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
7930da1778Schristos 
8030da1778Schristos 		++num_reallocs;
8130da1778Schristos 
8230da1778Schristos 		ccltbl = reallocate_Character_array (ccltbl,
8330da1778Schristos 						     current_max_ccl_tbl_size);
8430da1778Schristos 	}
8530da1778Schristos 
8630da1778Schristos 	ccllen[cclp] = len + 1;
87*7977e686Schristos 	ccltbl[newpos] = (unsigned char) ch;
8830da1778Schristos }
8930da1778Schristos 
9030da1778Schristos /* dump_cclp - same thing as list_character_set, but for cclps.  */
9130da1778Schristos 
dump_cclp(FILE * file,int cclp)9230da1778Schristos static void    dump_cclp (FILE* file, int cclp)
9330da1778Schristos {
9430da1778Schristos 	int i;
9530da1778Schristos 
9630da1778Schristos 	putc ('[', file);
9730da1778Schristos 
9830da1778Schristos 	for (i = 0; i < csize; ++i) {
9930da1778Schristos 		if (ccl_contains(cclp, i)){
10030da1778Schristos 			int start_char = i;
10130da1778Schristos 
10230da1778Schristos 			putc (' ', file);
10330da1778Schristos 
10430da1778Schristos 			fputs (readable_form (i), file);
10530da1778Schristos 
10630da1778Schristos 			while (++i < csize && ccl_contains(cclp,i)) ;
10730da1778Schristos 
10830da1778Schristos 			if (i - 1 > start_char)
10930da1778Schristos 				/* this was a run */
11030da1778Schristos 				fprintf (file, "-%s",
11130da1778Schristos 					 readable_form (i - 1));
11230da1778Schristos 
11330da1778Schristos 			putc (' ', file);
11430da1778Schristos 		}
11530da1778Schristos 	}
11630da1778Schristos 
11730da1778Schristos 	putc (']', file);
11830da1778Schristos }
11930da1778Schristos 
12030da1778Schristos 
12130da1778Schristos 
12230da1778Schristos /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
12330da1778Schristos int
ccl_set_diff(int a,int b)12430da1778Schristos ccl_set_diff (int a, int b)
12530da1778Schristos {
12630da1778Schristos     int  d, ch;
12730da1778Schristos 
12830da1778Schristos     /* create new class  */
12930da1778Schristos     d = cclinit();
13030da1778Schristos 
13130da1778Schristos     /* In order to handle negation, we spin through all possible chars,
13230da1778Schristos      * addding each char in a that is not in b.
13330da1778Schristos      * (This could be O(n^2), but n is small and bounded.)
13430da1778Schristos      */
13530da1778Schristos 	for ( ch = 0; ch < csize; ++ch )
13630da1778Schristos         if (ccl_contains (a, ch) && !ccl_contains(b, ch))
13730da1778Schristos             ccladd (d, ch);
13830da1778Schristos 
13930da1778Schristos     /* debug */
14030da1778Schristos     if (0){
14130da1778Schristos         fprintf(stderr, "ccl_set_diff (");
14230da1778Schristos             fprintf(stderr, "\n    ");
14330da1778Schristos             dump_cclp (stderr, a);
14430da1778Schristos             fprintf(stderr, "\n    ");
14530da1778Schristos             dump_cclp (stderr, b);
14630da1778Schristos             fprintf(stderr, "\n    ");
14730da1778Schristos             dump_cclp (stderr, d);
14830da1778Schristos         fprintf(stderr, "\n)\n");
14930da1778Schristos     }
15030da1778Schristos     return d;
15130da1778Schristos }
15230da1778Schristos 
15330da1778Schristos /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
15430da1778Schristos int
ccl_set_union(int a,int b)15530da1778Schristos ccl_set_union (int a, int b)
15630da1778Schristos {
15730da1778Schristos     int  d, i;
15830da1778Schristos 
15930da1778Schristos     /* create new class  */
16030da1778Schristos     d = cclinit();
16130da1778Schristos 
16230da1778Schristos     /* Add all of a */
16330da1778Schristos     for (i = 0; i < ccllen[a]; ++i)
16430da1778Schristos 		ccladd (d, ccltbl[cclmap[a] + i]);
16530da1778Schristos 
16630da1778Schristos     /* Add all of b */
16730da1778Schristos     for (i = 0; i < ccllen[b]; ++i)
16830da1778Schristos 		ccladd (d, ccltbl[cclmap[b] + i]);
16930da1778Schristos 
17030da1778Schristos     /* debug */
17130da1778Schristos     if (0){
17230da1778Schristos         fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
17330da1778Schristos             fprintf(stderr, "\n    ");
17430da1778Schristos             dump_cclp (stderr, a);
17530da1778Schristos             fprintf(stderr, "\n    ");
17630da1778Schristos             dump_cclp (stderr, b);
17730da1778Schristos             fprintf(stderr, "\n    ");
17830da1778Schristos             dump_cclp (stderr, d);
17930da1778Schristos         fprintf(stderr, "\n)\n");
18030da1778Schristos     }
18130da1778Schristos     return d;
18230da1778Schristos }
18330da1778Schristos 
18430da1778Schristos 
18530da1778Schristos /* cclinit - return an empty ccl */
18630da1778Schristos 
cclinit(void)187*7977e686Schristos int     cclinit (void)
18830da1778Schristos {
18930da1778Schristos 	if (++lastccl >= current_maxccls) {
19030da1778Schristos 		current_maxccls += MAX_CCLS_INCREMENT;
19130da1778Schristos 
19230da1778Schristos 		++num_reallocs;
19330da1778Schristos 
19430da1778Schristos 		cclmap =
19530da1778Schristos 			reallocate_integer_array (cclmap, current_maxccls);
19630da1778Schristos 		ccllen =
19730da1778Schristos 			reallocate_integer_array (ccllen, current_maxccls);
19830da1778Schristos 		cclng = reallocate_integer_array (cclng, current_maxccls);
19930da1778Schristos 		ccl_has_nl =
20030da1778Schristos 			reallocate_bool_array (ccl_has_nl,
20130da1778Schristos 					       current_maxccls);
20230da1778Schristos 	}
20330da1778Schristos 
20430da1778Schristos 	if (lastccl == 1)
20530da1778Schristos 		/* we're making the first ccl */
20630da1778Schristos 		cclmap[lastccl] = 0;
20730da1778Schristos 
20830da1778Schristos 	else
20930da1778Schristos 		/* The new pointer is just past the end of the last ccl.
21030da1778Schristos 		 * Since the cclmap points to the \first/ character of a
21130da1778Schristos 		 * ccl, adding the length of the ccl to the cclmap pointer
21230da1778Schristos 		 * will produce a cursor to the first free space.
21330da1778Schristos 		 */
21430da1778Schristos 		cclmap[lastccl] =
21530da1778Schristos 			cclmap[lastccl - 1] + ccllen[lastccl - 1];
21630da1778Schristos 
21730da1778Schristos 	ccllen[lastccl] = 0;
21830da1778Schristos 	cclng[lastccl] = 0;	/* ccl's start out life un-negated */
21930da1778Schristos 	ccl_has_nl[lastccl] = false;
22030da1778Schristos 
22130da1778Schristos 	return lastccl;
22230da1778Schristos }
22330da1778Schristos 
22430da1778Schristos 
22530da1778Schristos /* cclnegate - negate the given ccl */
22630da1778Schristos 
cclnegate(int cclp)227*7977e686Schristos void    cclnegate (int cclp)
22830da1778Schristos {
22930da1778Schristos 	cclng[cclp] = 1;
23030da1778Schristos 	ccl_has_nl[cclp] = !ccl_has_nl[cclp];
23130da1778Schristos }
23230da1778Schristos 
23330da1778Schristos 
23430da1778Schristos /* list_character_set - list the members of a set of characters in CCL form
23530da1778Schristos  *
23630da1778Schristos  * Writes to the given file a character-class representation of those
23730da1778Schristos  * characters present in the given CCL.  A character is present if it
23830da1778Schristos  * has a non-zero value in the cset array.
23930da1778Schristos  */
24030da1778Schristos 
list_character_set(FILE * file,int cset[])241*7977e686Schristos void    list_character_set (FILE *file, int cset[])
24230da1778Schristos {
24330da1778Schristos 	int i;
24430da1778Schristos 
24530da1778Schristos 	putc ('[', file);
24630da1778Schristos 
24730da1778Schristos 	for (i = 0; i < csize; ++i) {
24830da1778Schristos 		if (cset[i]) {
24930da1778Schristos 			int start_char = i;
25030da1778Schristos 
25130da1778Schristos 			putc (' ', file);
25230da1778Schristos 
25330da1778Schristos 			fputs (readable_form (i), file);
25430da1778Schristos 
25530da1778Schristos 			while (++i < csize && cset[i]) ;
25630da1778Schristos 
25730da1778Schristos 			if (i - 1 > start_char)
25830da1778Schristos 				/* this was a run */
25930da1778Schristos 				fprintf (file, "-%s",
26030da1778Schristos 					 readable_form (i - 1));
26130da1778Schristos 
26230da1778Schristos 			putc (' ', file);
26330da1778Schristos 		}
26430da1778Schristos 	}
26530da1778Schristos 
26630da1778Schristos 	putc (']', file);
26730da1778Schristos }
26830da1778Schristos 
26930da1778Schristos /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
27030da1778Schristos  * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
27130da1778Schristos  * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
27230da1778Schristos  * be in the range. If not, then this range is ambiguous, and the function
27330da1778Schristos  * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
27430da1778Schristos  * [a-z] will be labeled ambiguous because it does not include [A-Z].
27530da1778Schristos  *
27630da1778Schristos  * @param c1 the lower end of the range
27730da1778Schristos  * @param c2 the upper end of the range
27830da1778Schristos  * @return true if [c1-c2] is not ambiguous for a caseless scanner.
27930da1778Schristos  */
range_covers_case(int c1,int c2)28030da1778Schristos bool range_covers_case (int c1, int c2)
28130da1778Schristos {
28230da1778Schristos 	int     i, o;
28330da1778Schristos 
28430da1778Schristos 	for (i = c1; i <= c2; i++) {
28530da1778Schristos 		if (has_case (i)) {
28630da1778Schristos 			o = reverse_case (i);
28730da1778Schristos 			if (o < c1 || c2 < o)
28830da1778Schristos 				return false;
28930da1778Schristos 		}
29030da1778Schristos 	}
29130da1778Schristos 	return true;
29230da1778Schristos }
29330da1778Schristos 
29430da1778Schristos /** Reverse the case of a character, if possible.
29530da1778Schristos  * @return c if case-reversal does not apply.
29630da1778Schristos  */
reverse_case(int c)29730da1778Schristos int reverse_case (int c)
29830da1778Schristos {
29930da1778Schristos 	return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
30030da1778Schristos }
30130da1778Schristos 
30230da1778Schristos /** Return true if c is uppercase or lowercase. */
has_case(int c)30330da1778Schristos bool has_case (int c)
30430da1778Schristos {
30530da1778Schristos 	return (isupper (c) || islower (c)) ? true : false;
30630da1778Schristos }
307