130da1778Schristos /* ccl - routines for character classes */
230da1778Schristos
330da1778Schristos /* Copyright (c) 1990 The Regents of the University of California. */
430da1778Schristos /* All rights reserved. */
530da1778Schristos
630da1778Schristos /* This code is derived from software contributed to Berkeley by */
730da1778Schristos /* Vern Paxson. */
830da1778Schristos
930da1778Schristos /* The United States Government has rights in this work pursuant */
1030da1778Schristos /* to contract no. DE-AC03-76SF00098 between the United States */
1130da1778Schristos /* Department of Energy and the University of California. */
1230da1778Schristos
1330da1778Schristos /* This file is part of flex. */
1430da1778Schristos
1530da1778Schristos /* Redistribution and use in source and binary forms, with or without */
1630da1778Schristos /* modification, are permitted provided that the following conditions */
1730da1778Schristos /* are met: */
1830da1778Schristos
1930da1778Schristos /* 1. Redistributions of source code must retain the above copyright */
2030da1778Schristos /* notice, this list of conditions and the following disclaimer. */
2130da1778Schristos /* 2. Redistributions in binary form must reproduce the above copyright */
2230da1778Schristos /* notice, this list of conditions and the following disclaimer in the */
2330da1778Schristos /* documentation and/or other materials provided with the distribution. */
2430da1778Schristos
2530da1778Schristos /* Neither the name of the University nor the names of its contributors */
2630da1778Schristos /* may be used to endorse or promote products derived from this software */
2730da1778Schristos /* without specific prior written permission. */
2830da1778Schristos
2930da1778Schristos /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
3030da1778Schristos /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
3130da1778Schristos /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
3230da1778Schristos /* PURPOSE. */
3330da1778Schristos #include "flexdef.h"
34*7977e686Schristos __RCSID("$NetBSD: ccl.c,v 1.3 2017/01/02 17:45:27 christos Exp $");
35a11deec2Schristos
3630da1778Schristos
3730da1778Schristos /* return true if the chr is in the ccl. Takes negation into account. */
3830da1778Schristos static bool
ccl_contains(const int cclp,const int ch)3930da1778Schristos ccl_contains (const int cclp, const int ch)
4030da1778Schristos {
4130da1778Schristos int ind, len, i;
4230da1778Schristos
4330da1778Schristos len = ccllen[cclp];
4430da1778Schristos ind = cclmap[cclp];
4530da1778Schristos
4630da1778Schristos for (i = 0; i < len; ++i)
4730da1778Schristos if (ccltbl[ind + i] == ch)
4830da1778Schristos return !cclng[cclp];
4930da1778Schristos
5030da1778Schristos return cclng[cclp];
5130da1778Schristos }
5230da1778Schristos
5330da1778Schristos
5430da1778Schristos /* ccladd - add a single character to a ccl */
5530da1778Schristos
ccladd(int cclp,int ch)56*7977e686Schristos void ccladd (int cclp, int ch)
5730da1778Schristos {
5830da1778Schristos int ind, len, newpos, i;
5930da1778Schristos
6030da1778Schristos check_char (ch);
6130da1778Schristos
6230da1778Schristos len = ccllen[cclp];
6330da1778Schristos ind = cclmap[cclp];
6430da1778Schristos
6530da1778Schristos /* check to see if the character is already in the ccl */
6630da1778Schristos
6730da1778Schristos for (i = 0; i < len; ++i)
6830da1778Schristos if (ccltbl[ind + i] == ch)
6930da1778Schristos return;
7030da1778Schristos
7130da1778Schristos /* mark newlines */
7230da1778Schristos if (ch == nlch)
7330da1778Schristos ccl_has_nl[cclp] = true;
7430da1778Schristos
7530da1778Schristos newpos = ind + len;
7630da1778Schristos
7730da1778Schristos if (newpos >= current_max_ccl_tbl_size) {
7830da1778Schristos current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
7930da1778Schristos
8030da1778Schristos ++num_reallocs;
8130da1778Schristos
8230da1778Schristos ccltbl = reallocate_Character_array (ccltbl,
8330da1778Schristos current_max_ccl_tbl_size);
8430da1778Schristos }
8530da1778Schristos
8630da1778Schristos ccllen[cclp] = len + 1;
87*7977e686Schristos ccltbl[newpos] = (unsigned char) ch;
8830da1778Schristos }
8930da1778Schristos
9030da1778Schristos /* dump_cclp - same thing as list_character_set, but for cclps. */
9130da1778Schristos
dump_cclp(FILE * file,int cclp)9230da1778Schristos static void dump_cclp (FILE* file, int cclp)
9330da1778Schristos {
9430da1778Schristos int i;
9530da1778Schristos
9630da1778Schristos putc ('[', file);
9730da1778Schristos
9830da1778Schristos for (i = 0; i < csize; ++i) {
9930da1778Schristos if (ccl_contains(cclp, i)){
10030da1778Schristos int start_char = i;
10130da1778Schristos
10230da1778Schristos putc (' ', file);
10330da1778Schristos
10430da1778Schristos fputs (readable_form (i), file);
10530da1778Schristos
10630da1778Schristos while (++i < csize && ccl_contains(cclp,i)) ;
10730da1778Schristos
10830da1778Schristos if (i - 1 > start_char)
10930da1778Schristos /* this was a run */
11030da1778Schristos fprintf (file, "-%s",
11130da1778Schristos readable_form (i - 1));
11230da1778Schristos
11330da1778Schristos putc (' ', file);
11430da1778Schristos }
11530da1778Schristos }
11630da1778Schristos
11730da1778Schristos putc (']', file);
11830da1778Schristos }
11930da1778Schristos
12030da1778Schristos
12130da1778Schristos
12230da1778Schristos /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
12330da1778Schristos int
ccl_set_diff(int a,int b)12430da1778Schristos ccl_set_diff (int a, int b)
12530da1778Schristos {
12630da1778Schristos int d, ch;
12730da1778Schristos
12830da1778Schristos /* create new class */
12930da1778Schristos d = cclinit();
13030da1778Schristos
13130da1778Schristos /* In order to handle negation, we spin through all possible chars,
13230da1778Schristos * addding each char in a that is not in b.
13330da1778Schristos * (This could be O(n^2), but n is small and bounded.)
13430da1778Schristos */
13530da1778Schristos for ( ch = 0; ch < csize; ++ch )
13630da1778Schristos if (ccl_contains (a, ch) && !ccl_contains(b, ch))
13730da1778Schristos ccladd (d, ch);
13830da1778Schristos
13930da1778Schristos /* debug */
14030da1778Schristos if (0){
14130da1778Schristos fprintf(stderr, "ccl_set_diff (");
14230da1778Schristos fprintf(stderr, "\n ");
14330da1778Schristos dump_cclp (stderr, a);
14430da1778Schristos fprintf(stderr, "\n ");
14530da1778Schristos dump_cclp (stderr, b);
14630da1778Schristos fprintf(stderr, "\n ");
14730da1778Schristos dump_cclp (stderr, d);
14830da1778Schristos fprintf(stderr, "\n)\n");
14930da1778Schristos }
15030da1778Schristos return d;
15130da1778Schristos }
15230da1778Schristos
15330da1778Schristos /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
15430da1778Schristos int
ccl_set_union(int a,int b)15530da1778Schristos ccl_set_union (int a, int b)
15630da1778Schristos {
15730da1778Schristos int d, i;
15830da1778Schristos
15930da1778Schristos /* create new class */
16030da1778Schristos d = cclinit();
16130da1778Schristos
16230da1778Schristos /* Add all of a */
16330da1778Schristos for (i = 0; i < ccllen[a]; ++i)
16430da1778Schristos ccladd (d, ccltbl[cclmap[a] + i]);
16530da1778Schristos
16630da1778Schristos /* Add all of b */
16730da1778Schristos for (i = 0; i < ccllen[b]; ++i)
16830da1778Schristos ccladd (d, ccltbl[cclmap[b] + i]);
16930da1778Schristos
17030da1778Schristos /* debug */
17130da1778Schristos if (0){
17230da1778Schristos fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
17330da1778Schristos fprintf(stderr, "\n ");
17430da1778Schristos dump_cclp (stderr, a);
17530da1778Schristos fprintf(stderr, "\n ");
17630da1778Schristos dump_cclp (stderr, b);
17730da1778Schristos fprintf(stderr, "\n ");
17830da1778Schristos dump_cclp (stderr, d);
17930da1778Schristos fprintf(stderr, "\n)\n");
18030da1778Schristos }
18130da1778Schristos return d;
18230da1778Schristos }
18330da1778Schristos
18430da1778Schristos
18530da1778Schristos /* cclinit - return an empty ccl */
18630da1778Schristos
cclinit(void)187*7977e686Schristos int cclinit (void)
18830da1778Schristos {
18930da1778Schristos if (++lastccl >= current_maxccls) {
19030da1778Schristos current_maxccls += MAX_CCLS_INCREMENT;
19130da1778Schristos
19230da1778Schristos ++num_reallocs;
19330da1778Schristos
19430da1778Schristos cclmap =
19530da1778Schristos reallocate_integer_array (cclmap, current_maxccls);
19630da1778Schristos ccllen =
19730da1778Schristos reallocate_integer_array (ccllen, current_maxccls);
19830da1778Schristos cclng = reallocate_integer_array (cclng, current_maxccls);
19930da1778Schristos ccl_has_nl =
20030da1778Schristos reallocate_bool_array (ccl_has_nl,
20130da1778Schristos current_maxccls);
20230da1778Schristos }
20330da1778Schristos
20430da1778Schristos if (lastccl == 1)
20530da1778Schristos /* we're making the first ccl */
20630da1778Schristos cclmap[lastccl] = 0;
20730da1778Schristos
20830da1778Schristos else
20930da1778Schristos /* The new pointer is just past the end of the last ccl.
21030da1778Schristos * Since the cclmap points to the \first/ character of a
21130da1778Schristos * ccl, adding the length of the ccl to the cclmap pointer
21230da1778Schristos * will produce a cursor to the first free space.
21330da1778Schristos */
21430da1778Schristos cclmap[lastccl] =
21530da1778Schristos cclmap[lastccl - 1] + ccllen[lastccl - 1];
21630da1778Schristos
21730da1778Schristos ccllen[lastccl] = 0;
21830da1778Schristos cclng[lastccl] = 0; /* ccl's start out life un-negated */
21930da1778Schristos ccl_has_nl[lastccl] = false;
22030da1778Schristos
22130da1778Schristos return lastccl;
22230da1778Schristos }
22330da1778Schristos
22430da1778Schristos
22530da1778Schristos /* cclnegate - negate the given ccl */
22630da1778Schristos
cclnegate(int cclp)227*7977e686Schristos void cclnegate (int cclp)
22830da1778Schristos {
22930da1778Schristos cclng[cclp] = 1;
23030da1778Schristos ccl_has_nl[cclp] = !ccl_has_nl[cclp];
23130da1778Schristos }
23230da1778Schristos
23330da1778Schristos
23430da1778Schristos /* list_character_set - list the members of a set of characters in CCL form
23530da1778Schristos *
23630da1778Schristos * Writes to the given file a character-class representation of those
23730da1778Schristos * characters present in the given CCL. A character is present if it
23830da1778Schristos * has a non-zero value in the cset array.
23930da1778Schristos */
24030da1778Schristos
list_character_set(FILE * file,int cset[])241*7977e686Schristos void list_character_set (FILE *file, int cset[])
24230da1778Schristos {
24330da1778Schristos int i;
24430da1778Schristos
24530da1778Schristos putc ('[', file);
24630da1778Schristos
24730da1778Schristos for (i = 0; i < csize; ++i) {
24830da1778Schristos if (cset[i]) {
24930da1778Schristos int start_char = i;
25030da1778Schristos
25130da1778Schristos putc (' ', file);
25230da1778Schristos
25330da1778Schristos fputs (readable_form (i), file);
25430da1778Schristos
25530da1778Schristos while (++i < csize && cset[i]) ;
25630da1778Schristos
25730da1778Schristos if (i - 1 > start_char)
25830da1778Schristos /* this was a run */
25930da1778Schristos fprintf (file, "-%s",
26030da1778Schristos readable_form (i - 1));
26130da1778Schristos
26230da1778Schristos putc (' ', file);
26330da1778Schristos }
26430da1778Schristos }
26530da1778Schristos
26630da1778Schristos putc (']', file);
26730da1778Schristos }
26830da1778Schristos
26930da1778Schristos /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
27030da1778Schristos * scanner. Specifically, if a lowercase or uppercase character, x, is in the
27130da1778Schristos * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
27230da1778Schristos * be in the range. If not, then this range is ambiguous, and the function
27330da1778Schristos * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that
27430da1778Schristos * [a-z] will be labeled ambiguous because it does not include [A-Z].
27530da1778Schristos *
27630da1778Schristos * @param c1 the lower end of the range
27730da1778Schristos * @param c2 the upper end of the range
27830da1778Schristos * @return true if [c1-c2] is not ambiguous for a caseless scanner.
27930da1778Schristos */
range_covers_case(int c1,int c2)28030da1778Schristos bool range_covers_case (int c1, int c2)
28130da1778Schristos {
28230da1778Schristos int i, o;
28330da1778Schristos
28430da1778Schristos for (i = c1; i <= c2; i++) {
28530da1778Schristos if (has_case (i)) {
28630da1778Schristos o = reverse_case (i);
28730da1778Schristos if (o < c1 || c2 < o)
28830da1778Schristos return false;
28930da1778Schristos }
29030da1778Schristos }
29130da1778Schristos return true;
29230da1778Schristos }
29330da1778Schristos
29430da1778Schristos /** Reverse the case of a character, if possible.
29530da1778Schristos * @return c if case-reversal does not apply.
29630da1778Schristos */
reverse_case(int c)29730da1778Schristos int reverse_case (int c)
29830da1778Schristos {
29930da1778Schristos return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
30030da1778Schristos }
30130da1778Schristos
30230da1778Schristos /** Return true if c is uppercase or lowercase. */
has_case(int c)30330da1778Schristos bool has_case (int c)
30430da1778Schristos {
30530da1778Schristos return (isupper (c) || islower (c)) ? true : false;
30630da1778Schristos }
307