1 /* $NetBSD: ccl.c,v 1.1.1.1 2009/10/26 00:25:06 christos Exp $ */ 2 3 /* ccl - routines for character classes */ 4 5 /* Copyright (c) 1990 The Regents of the University of California. */ 6 /* All rights reserved. */ 7 8 /* This code is derived from software contributed to Berkeley by */ 9 /* Vern Paxson. */ 10 11 /* The United States Government has rights in this work pursuant */ 12 /* to contract no. DE-AC03-76SF00098 between the United States */ 13 /* Department of Energy and the University of California. */ 14 15 /* This file is part of flex. */ 16 17 /* Redistribution and use in source and binary forms, with or without */ 18 /* modification, are permitted provided that the following conditions */ 19 /* are met: */ 20 21 /* 1. Redistributions of source code must retain the above copyright */ 22 /* notice, this list of conditions and the following disclaimer. */ 23 /* 2. Redistributions in binary form must reproduce the above copyright */ 24 /* notice, this list of conditions and the following disclaimer in the */ 25 /* documentation and/or other materials provided with the distribution. */ 26 27 /* Neither the name of the University nor the names of its contributors */ 28 /* may be used to endorse or promote products derived from this software */ 29 /* without specific prior written permission. */ 30 31 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 32 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 33 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 34 /* PURPOSE. */ 35 36 #include "flexdef.h" 37 38 /* return true if the chr is in the ccl. Takes negation into account. */ 39 static bool 40 ccl_contains (const int cclp, const int ch) 41 { 42 int ind, len, i; 43 44 len = ccllen[cclp]; 45 ind = cclmap[cclp]; 46 47 for (i = 0; i < len; ++i) 48 if (ccltbl[ind + i] == ch) 49 return !cclng[cclp]; 50 51 return cclng[cclp]; 52 } 53 54 55 /* ccladd - add a single character to a ccl */ 56 57 void ccladd (cclp, ch) 58 int cclp; 59 int ch; 60 { 61 int ind, len, newpos, i; 62 63 check_char (ch); 64 65 len = ccllen[cclp]; 66 ind = cclmap[cclp]; 67 68 /* check to see if the character is already in the ccl */ 69 70 for (i = 0; i < len; ++i) 71 if (ccltbl[ind + i] == ch) 72 return; 73 74 /* mark newlines */ 75 if (ch == nlch) 76 ccl_has_nl[cclp] = true; 77 78 newpos = ind + len; 79 80 if (newpos >= current_max_ccl_tbl_size) { 81 current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; 82 83 ++num_reallocs; 84 85 ccltbl = reallocate_Character_array (ccltbl, 86 current_max_ccl_tbl_size); 87 } 88 89 ccllen[cclp] = len + 1; 90 ccltbl[newpos] = ch; 91 } 92 93 /* dump_cclp - same thing as list_character_set, but for cclps. */ 94 95 static void dump_cclp (FILE* file, int cclp) 96 { 97 register int i; 98 99 putc ('[', file); 100 101 for (i = 0; i < csize; ++i) { 102 if (ccl_contains(cclp, i)){ 103 register int start_char = i; 104 105 putc (' ', file); 106 107 fputs (readable_form (i), file); 108 109 while (++i < csize && ccl_contains(cclp,i)) ; 110 111 if (i - 1 > start_char) 112 /* this was a run */ 113 fprintf (file, "-%s", 114 readable_form (i - 1)); 115 116 putc (' ', file); 117 } 118 } 119 120 putc (']', file); 121 } 122 123 124 125 /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */ 126 int 127 ccl_set_diff (int a, int b) 128 { 129 int d, ch; 130 131 /* create new class */ 132 d = cclinit(); 133 134 /* In order to handle negation, we spin through all possible chars, 135 * addding each char in a that is not in b. 136 * (This could be O(n^2), but n is small and bounded.) 137 */ 138 for ( ch = 0; ch < csize; ++ch ) 139 if (ccl_contains (a, ch) && !ccl_contains(b, ch)) 140 ccladd (d, ch); 141 142 /* debug */ 143 if (0){ 144 fprintf(stderr, "ccl_set_diff ("); 145 fprintf(stderr, "\n "); 146 dump_cclp (stderr, a); 147 fprintf(stderr, "\n "); 148 dump_cclp (stderr, b); 149 fprintf(stderr, "\n "); 150 dump_cclp (stderr, d); 151 fprintf(stderr, "\n)\n"); 152 } 153 return d; 154 } 155 156 /* ccl_set_union - create a new ccl as the set union of the two given ccls. */ 157 int 158 ccl_set_union (int a, int b) 159 { 160 int d, i; 161 162 /* create new class */ 163 d = cclinit(); 164 165 /* Add all of a */ 166 for (i = 0; i < ccllen[a]; ++i) 167 ccladd (d, ccltbl[cclmap[a] + i]); 168 169 /* Add all of b */ 170 for (i = 0; i < ccllen[b]; ++i) 171 ccladd (d, ccltbl[cclmap[b] + i]); 172 173 /* debug */ 174 if (0){ 175 fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d); 176 fprintf(stderr, "\n "); 177 dump_cclp (stderr, a); 178 fprintf(stderr, "\n "); 179 dump_cclp (stderr, b); 180 fprintf(stderr, "\n "); 181 dump_cclp (stderr, d); 182 fprintf(stderr, "\n)\n"); 183 } 184 return d; 185 } 186 187 188 /* cclinit - return an empty ccl */ 189 190 int cclinit () 191 { 192 if (++lastccl >= current_maxccls) { 193 current_maxccls += MAX_CCLS_INCREMENT; 194 195 ++num_reallocs; 196 197 cclmap = 198 reallocate_integer_array (cclmap, current_maxccls); 199 ccllen = 200 reallocate_integer_array (ccllen, current_maxccls); 201 cclng = reallocate_integer_array (cclng, current_maxccls); 202 ccl_has_nl = 203 reallocate_bool_array (ccl_has_nl, 204 current_maxccls); 205 } 206 207 if (lastccl == 1) 208 /* we're making the first ccl */ 209 cclmap[lastccl] = 0; 210 211 else 212 /* The new pointer is just past the end of the last ccl. 213 * Since the cclmap points to the \first/ character of a 214 * ccl, adding the length of the ccl to the cclmap pointer 215 * will produce a cursor to the first free space. 216 */ 217 cclmap[lastccl] = 218 cclmap[lastccl - 1] + ccllen[lastccl - 1]; 219 220 ccllen[lastccl] = 0; 221 cclng[lastccl] = 0; /* ccl's start out life un-negated */ 222 ccl_has_nl[lastccl] = false; 223 224 return lastccl; 225 } 226 227 228 /* cclnegate - negate the given ccl */ 229 230 void cclnegate (cclp) 231 int cclp; 232 { 233 cclng[cclp] = 1; 234 ccl_has_nl[cclp] = !ccl_has_nl[cclp]; 235 } 236 237 238 /* list_character_set - list the members of a set of characters in CCL form 239 * 240 * Writes to the given file a character-class representation of those 241 * characters present in the given CCL. A character is present if it 242 * has a non-zero value in the cset array. 243 */ 244 245 void list_character_set (file, cset) 246 FILE *file; 247 int cset[]; 248 { 249 register int i; 250 251 putc ('[', file); 252 253 for (i = 0; i < csize; ++i) { 254 if (cset[i]) { 255 register int start_char = i; 256 257 putc (' ', file); 258 259 fputs (readable_form (i), file); 260 261 while (++i < csize && cset[i]) ; 262 263 if (i - 1 > start_char) 264 /* this was a run */ 265 fprintf (file, "-%s", 266 readable_form (i - 1)); 267 268 putc (' ', file); 269 } 270 } 271 272 putc (']', file); 273 } 274 275 /** Determines if the range [c1-c2] is unambiguous in a case-insensitive 276 * scanner. Specifically, if a lowercase or uppercase character, x, is in the 277 * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also 278 * be in the range. If not, then this range is ambiguous, and the function 279 * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that 280 * [a-z] will be labeled ambiguous because it does not include [A-Z]. 281 * 282 * @param c1 the lower end of the range 283 * @param c2 the upper end of the range 284 * @return true if [c1-c2] is not ambiguous for a caseless scanner. 285 */ 286 bool range_covers_case (int c1, int c2) 287 { 288 int i, o; 289 290 for (i = c1; i <= c2; i++) { 291 if (has_case (i)) { 292 o = reverse_case (i); 293 if (o < c1 || c2 < o) 294 return false; 295 } 296 } 297 return true; 298 } 299 300 /** Reverse the case of a character, if possible. 301 * @return c if case-reversal does not apply. 302 */ 303 int reverse_case (int c) 304 { 305 return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c); 306 } 307 308 /** Return true if c is uppercase or lowercase. */ 309 bool has_case (int c) 310 { 311 return (isupper (c) || islower (c)) ? true : false; 312 } 313