1 /* $NetBSD: cook.c,v 1.4 2016/01/08 21:35:41 christos Exp $ */ 2 3 /** 4 * \file cook.c 5 * 6 * This file contains the routines that deal with processing quoted strings 7 * into an internal format. 8 * 9 * @addtogroup autoopts 10 * @{ 11 */ 12 /* 13 * This file is part of AutoOpts, a companion to AutoGen. 14 * AutoOpts is free software. 15 * AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved 16 * 17 * AutoOpts is available under any one of two licenses. The license 18 * in use must be one of these two and the choice is under the control 19 * of the user of the license. 20 * 21 * The GNU Lesser General Public License, version 3 or later 22 * See the files "COPYING.lgplv3" and "COPYING.gplv3" 23 * 24 * The Modified Berkeley Software Distribution License 25 * See the file "COPYING.mbsd" 26 * 27 * These files have the following sha256 sums: 28 * 29 * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3 30 * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3 31 * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd 32 */ 33 34 /* = = = START-STATIC-FORWARD = = = */ 35 static bool 36 contiguous_quote(char ** pps, char * pq, int * lnct_p); 37 /* = = = END-STATIC-FORWARD = = = */ 38 39 /*=export_func ao_string_cook_escape_char 40 * private: 41 * 42 * what: escape-process a string fragment 43 * arg: + char const * + pzScan + points to character after the escape + 44 * arg: + char * + pRes + Where to put the result byte + 45 * arg: + unsigned int + nl_ch + replacement char if scanned char is \n + 46 * 47 * ret-type: unsigned int 48 * ret-desc: The number of bytes consumed processing the escaped character. 49 * 50 * doc: 51 * 52 * This function converts "t" into "\t" and all your other favorite 53 * escapes, including numeric ones: hex and ocatal, too. 54 * The returned result tells the caller how far to advance the 55 * scan pointer (passed in). The default is to just pass through the 56 * escaped character and advance the scan by one. 57 * 58 * Some applications need to keep an escaped newline, others need to 59 * suppress it. This is accomplished by supplying a '\n' replacement 60 * character that is different from \n, if need be. For example, use 61 * 0x7F and never emit a 0x7F. 62 * 63 * err: @code{NULL} is returned if the string is mal-formed. 64 =*/ 65 unsigned int 66 ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl) 67 { 68 unsigned int res = 1; 69 70 switch (*pRes = *pzIn++) { 71 case NUL: /* NUL - end of input string */ 72 return 0; 73 case '\r': 74 if (*pzIn != NL) 75 return 1; 76 res++; 77 /* FALLTHROUGH */ 78 case NL: /* NL - emit newline */ 79 *pRes = (char)nl; 80 return res; 81 82 case 'a': *pRes = '\a'; break; 83 case 'b': *pRes = '\b'; break; 84 case 'f': *pRes = '\f'; break; 85 case 'n': *pRes = NL; break; 86 case 'r': *pRes = '\r'; break; 87 case 't': *pRes = '\t'; break; 88 case 'v': *pRes = '\v'; break; 89 90 case 'x': 91 case 'X': /* HEX Escape */ 92 if (IS_HEX_DIGIT_CHAR(*pzIn)) { 93 char z[4]; 94 unsigned int ct = 0; 95 96 do { 97 z[ct] = pzIn[ct]; 98 if (++ct >= 2) 99 break; 100 } while (IS_HEX_DIGIT_CHAR(pzIn[ct])); 101 z[ct] = NUL; 102 *pRes = (char)strtoul(z, NULL, 16); 103 return ct + 1; 104 } 105 break; 106 107 case '0': case '1': case '2': case '3': 108 case '4': case '5': case '6': case '7': 109 { 110 /* 111 * IF the character copied was an octal digit, 112 * THEN set the output character to an octal value. 113 * The 3 octal digit result might exceed 0xFF, so check it. 114 */ 115 char z[4]; 116 unsigned long val; 117 unsigned int ct = 0; 118 119 z[ct++] = *--pzIn; 120 while (IS_OCT_DIGIT_CHAR(pzIn[ct])) { 121 z[ct] = pzIn[ct]; 122 if (++ct >= 3) 123 break; 124 } 125 126 z[ct] = NUL; 127 val = strtoul(z, NULL, 8); 128 if (val > 0xFF) 129 val = 0xFF; 130 *pRes = (char)val; 131 return ct; 132 } 133 134 default: /* quoted character is result character */; 135 } 136 137 return res; 138 } 139 140 141 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 142 * 143 * A quoted string has been found. 144 * Find the end of it and compress any escape sequences. 145 */ 146 static bool 147 contiguous_quote(char ** pps, char * pq, int * lnct_p) 148 { 149 char * ps = *pps + 1; 150 151 for (;;) { 152 while (IS_WHITESPACE_CHAR(*ps)) 153 if (*(ps++) == NL) 154 (*lnct_p)++; 155 156 /* 157 * IF the next character is a quote character, 158 * THEN we will concatenate the strings. 159 */ 160 switch (*ps) { 161 case '"': 162 case '\'': 163 *pq = *(ps++); /* assign new quote character and return */ 164 *pps = ps; 165 return true; 166 167 case '/': 168 /* 169 * Allow for a comment embedded in the concatenated string. 170 */ 171 switch (ps[1]) { 172 default: 173 *pps = NULL; 174 return false; 175 176 case '/': 177 /* 178 * Skip to end of line 179 */ 180 ps = strchr(ps, NL); 181 if (ps == NULL) { 182 *pps = NULL; 183 return false; 184 } 185 break; 186 187 case '*': 188 { 189 char * p = strstr( ps+2, "*/" ); 190 /* 191 * Skip to terminating star slash 192 */ 193 if (p == NULL) { 194 *pps = NULL; 195 return false; 196 } 197 198 while (ps < p) { 199 if (*(ps++) == NL) 200 (*lnct_p)++; 201 } 202 203 ps = p + 2; 204 } 205 } 206 continue; 207 208 default: 209 /* 210 * The next non-whitespace character is not a quote. 211 * The series of quoted strings has come to an end. 212 */ 213 *pps = ps; 214 return false; 215 } 216 } 217 } 218 219 /*=export_func ao_string_cook 220 * private: 221 * 222 * what: concatenate and escape-process strings 223 * arg: + char * + pzScan + The *MODIFIABLE* input buffer + 224 * arg: + int * + lnct_p + The (possibly NULL) pointer to a line count + 225 * 226 * ret-type: char * 227 * ret-desc: The address of the text following the processed strings. 228 * The return value is NULL if the strings are ill-formed. 229 * 230 * doc: 231 * 232 * A series of one or more quoted strings are concatenated together. 233 * If they are quoted with double quotes (@code{"}), then backslash 234 * escapes are processed per the C programming language. If they are 235 * single quote strings, then the backslashes are honored only when they 236 * precede another backslash or a single quote character. 237 * 238 * err: @code{NULL} is returned if the string(s) is/are mal-formed. 239 =*/ 240 char * 241 ao_string_cook(char * pzScan, int * lnct_p) 242 { 243 int l = 0; 244 char q = *pzScan; 245 246 /* 247 * It is a quoted string. Process the escape sequence characters 248 * (in the set "abfnrtv") and make sure we find a closing quote. 249 */ 250 char * pzD = pzScan++; 251 char * pzS = pzScan; 252 253 if (lnct_p == NULL) 254 lnct_p = &l; 255 256 for (;;) { 257 /* 258 * IF the next character is the quote character, THEN we may end the 259 * string. We end it unless the next non-blank character *after* the 260 * string happens to also be a quote. If it is, then we will change 261 * our quote character to the new quote character and continue 262 * condensing text. 263 */ 264 while (*pzS == q) { 265 *pzD = NUL; /* This is probably the end of the line */ 266 if (! contiguous_quote(&pzS, &q, lnct_p)) 267 return pzS; 268 } 269 270 /* 271 * We are inside a quoted string. Copy text. 272 */ 273 switch (*(pzD++) = *(pzS++)) { 274 case NUL: 275 return NULL; 276 277 case NL: 278 (*lnct_p)++; 279 break; 280 281 case '\\': 282 /* 283 * IF we are escaping a new line, 284 * THEN drop both the escape and the newline from 285 * the result string. 286 */ 287 if (*pzS == NL) { 288 pzS++; 289 pzD--; 290 (*lnct_p)++; 291 } 292 293 /* 294 * ELSE IF the quote character is '"' or '`', 295 * THEN we do the full escape character processing 296 */ 297 else if (q != '\'') { 298 unsigned int ct; 299 ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL); 300 if (ct == 0) 301 return NULL; 302 303 pzS += ct; 304 } /* if (q != '\'') */ 305 306 /* 307 * OTHERWISE, we only process "\\", "\'" and "\#" sequences. 308 * The latter only to easily hide preprocessing directives. 309 */ 310 else switch (*pzS) { 311 case '\\': 312 case '\'': 313 case '#': 314 pzD[-1] = *pzS++; 315 } 316 } /* switch (*(pzD++) = *(pzS++)) */ 317 } /* for (;;) */ 318 } 319 320 /** @} 321 * 322 * Local Variables: 323 * mode: C 324 * c-file-style: "stroustrup" 325 * indent-tabs-mode: nil 326 * End: 327 * end of autoopts/cook.c */ 328