1*eabc0478Schristos /* $NetBSD: cook.c,v 1.6 2024/08/18 20:47:24 christos Exp $ */ 2abb0f93cSkardel 3f003fb54Skardel /** 4f003fb54Skardel * \file cook.c 5f003fb54Skardel * 6abb0f93cSkardel * This file contains the routines that deal with processing quoted strings 7abb0f93cSkardel * into an internal format. 8abb0f93cSkardel * 98585484eSchristos * @addtogroup autoopts 108585484eSchristos * @{ 118585484eSchristos */ 128585484eSchristos /* 13abb0f93cSkardel * This file is part of AutoOpts, a companion to AutoGen. 14abb0f93cSkardel * AutoOpts is free software. 15*eabc0478Schristos * AutoOpts is Copyright (C) 1992-2018 by Bruce Korb - all rights reserved 16abb0f93cSkardel * 17abb0f93cSkardel * AutoOpts is available under any one of two licenses. The license 18abb0f93cSkardel * in use must be one of these two and the choice is under the control 19abb0f93cSkardel * of the user of the license. 20abb0f93cSkardel * 21abb0f93cSkardel * The GNU Lesser General Public License, version 3 or later 22abb0f93cSkardel * See the files "COPYING.lgplv3" and "COPYING.gplv3" 23abb0f93cSkardel * 24abb0f93cSkardel * The Modified Berkeley Software Distribution License 25abb0f93cSkardel * See the file "COPYING.mbsd" 26abb0f93cSkardel * 278585484eSchristos * These files have the following sha256 sums: 28abb0f93cSkardel * 298585484eSchristos * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3 308585484eSchristos * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3 318585484eSchristos * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd 32abb0f93cSkardel */ 33abb0f93cSkardel 34abb0f93cSkardel /*=export_func ao_string_cook_escape_char 35abb0f93cSkardel * private: 36abb0f93cSkardel * 37abb0f93cSkardel * what: escape-process a string fragment 38abb0f93cSkardel * arg: + char const * + pzScan + points to character after the escape + 39abb0f93cSkardel * arg: + char * + pRes + Where to put the result byte + 40abb0f93cSkardel * arg: + unsigned int + nl_ch + replacement char if scanned char is \n + 41abb0f93cSkardel * 42abb0f93cSkardel * ret-type: unsigned int 43abb0f93cSkardel * ret-desc: The number of bytes consumed processing the escaped character. 44abb0f93cSkardel * 45abb0f93cSkardel * doc: 46abb0f93cSkardel * 47abb0f93cSkardel * This function converts "t" into "\t" and all your other favorite 48abb0f93cSkardel * escapes, including numeric ones: hex and ocatal, too. 49abb0f93cSkardel * The returned result tells the caller how far to advance the 50abb0f93cSkardel * scan pointer (passed in). The default is to just pass through the 51abb0f93cSkardel * escaped character and advance the scan by one. 52abb0f93cSkardel * 53abb0f93cSkardel * Some applications need to keep an escaped newline, others need to 54abb0f93cSkardel * suppress it. This is accomplished by supplying a '\n' replacement 55abb0f93cSkardel * character that is different from \n, if need be. For example, use 56abb0f93cSkardel * 0x7F and never emit a 0x7F. 57abb0f93cSkardel * 58abb0f93cSkardel * err: @code{NULL} is returned if the string is mal-formed. 59abb0f93cSkardel =*/ 60abb0f93cSkardel unsigned int 618585484eSchristos ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl) 62abb0f93cSkardel { 63abb0f93cSkardel unsigned int res = 1; 64abb0f93cSkardel 65abb0f93cSkardel switch (*pRes = *pzIn++) { 66abb0f93cSkardel case NUL: /* NUL - end of input string */ 67abb0f93cSkardel return 0; 68abb0f93cSkardel case '\r': 698585484eSchristos if (*pzIn != NL) 70abb0f93cSkardel return 1; 71abb0f93cSkardel res++; 72abb0f93cSkardel /* FALLTHROUGH */ 738585484eSchristos case NL: /* NL - emit newline */ 74abb0f93cSkardel *pRes = (char)nl; 75abb0f93cSkardel return res; 76abb0f93cSkardel 77abb0f93cSkardel case 'a': *pRes = '\a'; break; 78abb0f93cSkardel case 'b': *pRes = '\b'; break; 79abb0f93cSkardel case 'f': *pRes = '\f'; break; 808585484eSchristos case 'n': *pRes = NL; break; 81abb0f93cSkardel case 'r': *pRes = '\r'; break; 82abb0f93cSkardel case 't': *pRes = '\t'; break; 83abb0f93cSkardel case 'v': *pRes = '\v'; break; 84abb0f93cSkardel 85abb0f93cSkardel case 'x': 86abb0f93cSkardel case 'X': /* HEX Escape */ 87abb0f93cSkardel if (IS_HEX_DIGIT_CHAR(*pzIn)) { 888585484eSchristos char z[4]; 898585484eSchristos unsigned int ct = 0; 90abb0f93cSkardel 918585484eSchristos do { 928585484eSchristos z[ct] = pzIn[ct]; 938585484eSchristos if (++ct >= 2) 948585484eSchristos break; 958585484eSchristos } while (IS_HEX_DIGIT_CHAR(pzIn[ct])); 968585484eSchristos z[ct] = NUL; 978585484eSchristos *pRes = (char)strtoul(z, NULL, 16); 988585484eSchristos return ct + 1; 99abb0f93cSkardel } 100abb0f93cSkardel break; 101abb0f93cSkardel 102abb0f93cSkardel case '0': case '1': case '2': case '3': 103abb0f93cSkardel case '4': case '5': case '6': case '7': 104abb0f93cSkardel { 105abb0f93cSkardel /* 106abb0f93cSkardel * IF the character copied was an octal digit, 1078585484eSchristos * THEN set the output character to an octal value. 1088585484eSchristos * The 3 octal digit result might exceed 0xFF, so check it. 109abb0f93cSkardel */ 1108585484eSchristos char z[4]; 111abb0f93cSkardel unsigned long val; 1128585484eSchristos unsigned int ct = 0; 113abb0f93cSkardel 1148585484eSchristos z[ct++] = *--pzIn; 1158585484eSchristos while (IS_OCT_DIGIT_CHAR(pzIn[ct])) { 1168585484eSchristos z[ct] = pzIn[ct]; 1178585484eSchristos if (++ct >= 3) 118abb0f93cSkardel break; 119abb0f93cSkardel } 120abb0f93cSkardel 1218585484eSchristos z[ct] = NUL; 1228585484eSchristos val = strtoul(z, NULL, 8); 1238585484eSchristos if (val > 0xFF) 1248585484eSchristos val = 0xFF; 1258585484eSchristos *pRes = (char)val; 1268585484eSchristos return ct; 1278585484eSchristos } 1288585484eSchristos 1298585484eSchristos default: /* quoted character is result character */; 130abb0f93cSkardel } 131abb0f93cSkardel 132abb0f93cSkardel return res; 133abb0f93cSkardel } 134abb0f93cSkardel 135*eabc0478Schristos /** 136*eabc0478Schristos * count newlines between start and end 137*eabc0478Schristos */ 138*eabc0478Schristos static char * 139*eabc0478Schristos nl_count(char * start, char * end, int * lnct_p) 140*eabc0478Schristos { 141*eabc0478Schristos while (start < end) { 142*eabc0478Schristos if (*(start++) == NL) 143*eabc0478Schristos (*lnct_p)++; 144*eabc0478Schristos } 145*eabc0478Schristos return end; 146*eabc0478Schristos } 147abb0f93cSkardel 148abb0f93cSkardel /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 149abb0f93cSkardel * 150abb0f93cSkardel * A quoted string has been found. 151abb0f93cSkardel * Find the end of it and compress any escape sequences. 152abb0f93cSkardel */ 1538585484eSchristos static bool 154f003fb54Skardel contiguous_quote(char ** pps, char * pq, int * lnct_p) 155f003fb54Skardel { 156f003fb54Skardel char * ps = *pps + 1; 157f003fb54Skardel 158f003fb54Skardel for (;;) { 159f003fb54Skardel while (IS_WHITESPACE_CHAR(*ps)) 1608585484eSchristos if (*(ps++) == NL) 161f003fb54Skardel (*lnct_p)++; 162f003fb54Skardel 163f003fb54Skardel /* 164f003fb54Skardel * IF the next character is a quote character, 165f003fb54Skardel * THEN we will concatenate the strings. 166f003fb54Skardel */ 167f003fb54Skardel switch (*ps) { 168f003fb54Skardel case '"': 169f003fb54Skardel case '\'': 170f003fb54Skardel *pq = *(ps++); /* assign new quote character and return */ 171f003fb54Skardel *pps = ps; 1728585484eSchristos return true; 173f003fb54Skardel 174f003fb54Skardel case '/': 175f003fb54Skardel /* 176f003fb54Skardel * Allow for a comment embedded in the concatenated string. 177f003fb54Skardel */ 178f003fb54Skardel switch (ps[1]) { 179f003fb54Skardel default: 180*eabc0478Schristos goto fail_return; 181f003fb54Skardel 182f003fb54Skardel case '/': 183f003fb54Skardel /* 184f003fb54Skardel * Skip to end of line 185f003fb54Skardel */ 1868585484eSchristos ps = strchr(ps, NL); 187*eabc0478Schristos if (ps == NULL) 188*eabc0478Schristos goto fail_return; 189f003fb54Skardel break; 190f003fb54Skardel 191f003fb54Skardel case '*': 192*eabc0478Schristos ps = nl_count(ps + 2, strstr(ps + 2, "*/"), lnct_p); 193*eabc0478Schristos if (ps == NULL) 194*eabc0478Schristos goto fail_return; 195*eabc0478Schristos ps += 2; 196f003fb54Skardel } 197f003fb54Skardel continue; 198f003fb54Skardel 199f003fb54Skardel default: 200f003fb54Skardel /* 201f003fb54Skardel * The next non-whitespace character is not a quote. 202f003fb54Skardel * The series of quoted strings has come to an end. 203f003fb54Skardel */ 204f003fb54Skardel *pps = ps; 2058585484eSchristos return false; 206f003fb54Skardel } 207f003fb54Skardel } 208*eabc0478Schristos 209*eabc0478Schristos fail_return: 210*eabc0478Schristos *pps = NULL; 211*eabc0478Schristos return false; 212f003fb54Skardel } 213f003fb54Skardel 214abb0f93cSkardel /*=export_func ao_string_cook 215abb0f93cSkardel * private: 216abb0f93cSkardel * 217abb0f93cSkardel * what: concatenate and escape-process strings 218abb0f93cSkardel * arg: + char * + pzScan + The *MODIFIABLE* input buffer + 219f003fb54Skardel * arg: + int * + lnct_p + The (possibly NULL) pointer to a line count + 220abb0f93cSkardel * 221abb0f93cSkardel * ret-type: char * 222abb0f93cSkardel * ret-desc: The address of the text following the processed strings. 223abb0f93cSkardel * The return value is NULL if the strings are ill-formed. 224abb0f93cSkardel * 225abb0f93cSkardel * doc: 226abb0f93cSkardel * 227abb0f93cSkardel * A series of one or more quoted strings are concatenated together. 228abb0f93cSkardel * If they are quoted with double quotes (@code{"}), then backslash 229abb0f93cSkardel * escapes are processed per the C programming language. If they are 230abb0f93cSkardel * single quote strings, then the backslashes are honored only when they 231abb0f93cSkardel * precede another backslash or a single quote character. 232abb0f93cSkardel * 233abb0f93cSkardel * err: @code{NULL} is returned if the string(s) is/are mal-formed. 234abb0f93cSkardel =*/ 235abb0f93cSkardel char * 236f003fb54Skardel ao_string_cook(char * pzScan, int * lnct_p) 237abb0f93cSkardel { 238abb0f93cSkardel int l = 0; 239abb0f93cSkardel char q = *pzScan; 240abb0f93cSkardel 241abb0f93cSkardel /* 242abb0f93cSkardel * It is a quoted string. Process the escape sequence characters 243abb0f93cSkardel * (in the set "abfnrtv") and make sure we find a closing quote. 244abb0f93cSkardel */ 245abb0f93cSkardel char * pzD = pzScan++; 246abb0f93cSkardel char * pzS = pzScan; 247abb0f93cSkardel 248f003fb54Skardel if (lnct_p == NULL) 249f003fb54Skardel lnct_p = &l; 250abb0f93cSkardel 251abb0f93cSkardel for (;;) { 252abb0f93cSkardel /* 253abb0f93cSkardel * IF the next character is the quote character, THEN we may end the 254abb0f93cSkardel * string. We end it unless the next non-blank character *after* the 255abb0f93cSkardel * string happens to also be a quote. If it is, then we will change 256abb0f93cSkardel * our quote character to the new quote character and continue 257abb0f93cSkardel * condensing text. 258abb0f93cSkardel */ 259abb0f93cSkardel while (*pzS == q) { 260abb0f93cSkardel *pzD = NUL; /* This is probably the end of the line */ 261f003fb54Skardel if (! contiguous_quote(&pzS, &q, lnct_p)) 262abb0f93cSkardel return pzS; 263abb0f93cSkardel } 264abb0f93cSkardel 265abb0f93cSkardel /* 266abb0f93cSkardel * We are inside a quoted string. Copy text. 267abb0f93cSkardel */ 268abb0f93cSkardel switch (*(pzD++) = *(pzS++)) { 269abb0f93cSkardel case NUL: 270abb0f93cSkardel return NULL; 271abb0f93cSkardel 2728585484eSchristos case NL: 273f003fb54Skardel (*lnct_p)++; 274abb0f93cSkardel break; 275abb0f93cSkardel 276abb0f93cSkardel case '\\': 277abb0f93cSkardel /* 278abb0f93cSkardel * IF we are escaping a new line, 279abb0f93cSkardel * THEN drop both the escape and the newline from 280abb0f93cSkardel * the result string. 281abb0f93cSkardel */ 2828585484eSchristos if (*pzS == NL) { 283abb0f93cSkardel pzS++; 284abb0f93cSkardel pzD--; 285f003fb54Skardel (*lnct_p)++; 286abb0f93cSkardel } 287abb0f93cSkardel 288abb0f93cSkardel /* 289abb0f93cSkardel * ELSE IF the quote character is '"' or '`', 290abb0f93cSkardel * THEN we do the full escape character processing 291abb0f93cSkardel */ 292abb0f93cSkardel else if (q != '\'') { 2938585484eSchristos unsigned int ct; 2948585484eSchristos ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL); 295abb0f93cSkardel if (ct == 0) 296abb0f93cSkardel return NULL; 297abb0f93cSkardel 298abb0f93cSkardel pzS += ct; 299abb0f93cSkardel } /* if (q != '\'') */ 300abb0f93cSkardel 301abb0f93cSkardel /* 302abb0f93cSkardel * OTHERWISE, we only process "\\", "\'" and "\#" sequences. 303abb0f93cSkardel * The latter only to easily hide preprocessing directives. 304abb0f93cSkardel */ 305abb0f93cSkardel else switch (*pzS) { 306abb0f93cSkardel case '\\': 307abb0f93cSkardel case '\'': 308abb0f93cSkardel case '#': 309abb0f93cSkardel pzD[-1] = *pzS++; 310abb0f93cSkardel } 311abb0f93cSkardel } /* switch (*(pzD++) = *(pzS++)) */ 312abb0f93cSkardel } /* for (;;) */ 313abb0f93cSkardel } 3148585484eSchristos 3158585484eSchristos /** @} 3168585484eSchristos * 317abb0f93cSkardel * Local Variables: 318abb0f93cSkardel * mode: C 319abb0f93cSkardel * c-file-style: "stroustrup" 320abb0f93cSkardel * indent-tabs-mode: nil 321abb0f93cSkardel * End: 322abb0f93cSkardel * end of autoopts/cook.c */ 323