12b15cb3dSCy Schubert /**
22b15cb3dSCy Schubert * \file cook.c
3ea906c41SOllivier Robert *
4ea906c41SOllivier Robert * This file contains the routines that deal with processing quoted strings
5ea906c41SOllivier Robert * into an internal format.
62b15cb3dSCy Schubert *
72b15cb3dSCy Schubert * @addtogroup autoopts
82b15cb3dSCy Schubert * @{
9ea906c41SOllivier Robert */
10ea906c41SOllivier Robert /*
112b15cb3dSCy Schubert * This file is part of AutoOpts, a companion to AutoGen.
122b15cb3dSCy Schubert * AutoOpts is free software.
13*a466cc55SCy Schubert * AutoOpts is Copyright (C) 1992-2018 by Bruce Korb - all rights reserved
14ea906c41SOllivier Robert *
152b15cb3dSCy Schubert * AutoOpts is available under any one of two licenses. The license
162b15cb3dSCy Schubert * in use must be one of these two and the choice is under the control
172b15cb3dSCy Schubert * of the user of the license.
18ea906c41SOllivier Robert *
192b15cb3dSCy Schubert * The GNU Lesser General Public License, version 3 or later
202b15cb3dSCy Schubert * See the files "COPYING.lgplv3" and "COPYING.gplv3"
21ea906c41SOllivier Robert *
222b15cb3dSCy Schubert * The Modified Berkeley Software Distribution License
232b15cb3dSCy Schubert * See the file "COPYING.mbsd"
24ea906c41SOllivier Robert *
252b15cb3dSCy Schubert * These files have the following sha256 sums:
26ea906c41SOllivier Robert *
272b15cb3dSCy Schubert * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3
282b15cb3dSCy Schubert * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3
292b15cb3dSCy Schubert * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd
30ea906c41SOllivier Robert */
31ea906c41SOllivier Robert
32ea906c41SOllivier Robert /*=export_func ao_string_cook_escape_char
33ea906c41SOllivier Robert * private:
34ea906c41SOllivier Robert *
35ea906c41SOllivier Robert * what: escape-process a string fragment
36ea906c41SOllivier Robert * arg: + char const * + pzScan + points to character after the escape +
37ea906c41SOllivier Robert * arg: + char * + pRes + Where to put the result byte +
38ea906c41SOllivier Robert * arg: + unsigned int + nl_ch + replacement char if scanned char is \n +
39ea906c41SOllivier Robert *
40ea906c41SOllivier Robert * ret-type: unsigned int
41ea906c41SOllivier Robert * ret-desc: The number of bytes consumed processing the escaped character.
42ea906c41SOllivier Robert *
43ea906c41SOllivier Robert * doc:
44ea906c41SOllivier Robert *
45ea906c41SOllivier Robert * This function converts "t" into "\t" and all your other favorite
46ea906c41SOllivier Robert * escapes, including numeric ones: hex and ocatal, too.
47ea906c41SOllivier Robert * The returned result tells the caller how far to advance the
48ea906c41SOllivier Robert * scan pointer (passed in). The default is to just pass through the
49ea906c41SOllivier Robert * escaped character and advance the scan by one.
50ea906c41SOllivier Robert *
51ea906c41SOllivier Robert * Some applications need to keep an escaped newline, others need to
52ea906c41SOllivier Robert * suppress it. This is accomplished by supplying a '\n' replacement
53ea906c41SOllivier Robert * character that is different from \n, if need be. For example, use
54ea906c41SOllivier Robert * 0x7F and never emit a 0x7F.
55ea906c41SOllivier Robert *
56ea906c41SOllivier Robert * err: @code{NULL} is returned if the string is mal-formed.
57ea906c41SOllivier Robert =*/
58ea906c41SOllivier Robert unsigned int
ao_string_cook_escape_char(char const * pzIn,char * pRes,uint_t nl)592b15cb3dSCy Schubert ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
60ea906c41SOllivier Robert {
61ea906c41SOllivier Robert unsigned int res = 1;
62ea906c41SOllivier Robert
63ea906c41SOllivier Robert switch (*pRes = *pzIn++) {
64ea906c41SOllivier Robert case NUL: /* NUL - end of input string */
65ea906c41SOllivier Robert return 0;
66ea906c41SOllivier Robert case '\r':
672b15cb3dSCy Schubert if (*pzIn != NL)
68ea906c41SOllivier Robert return 1;
69ea906c41SOllivier Robert res++;
70ea906c41SOllivier Robert /* FALLTHROUGH */
712b15cb3dSCy Schubert case NL: /* NL - emit newline */
72ea906c41SOllivier Robert *pRes = (char)nl;
73ea906c41SOllivier Robert return res;
74ea906c41SOllivier Robert
75ea906c41SOllivier Robert case 'a': *pRes = '\a'; break;
76ea906c41SOllivier Robert case 'b': *pRes = '\b'; break;
77ea906c41SOllivier Robert case 'f': *pRes = '\f'; break;
782b15cb3dSCy Schubert case 'n': *pRes = NL; break;
79ea906c41SOllivier Robert case 'r': *pRes = '\r'; break;
80ea906c41SOllivier Robert case 't': *pRes = '\t'; break;
81ea906c41SOllivier Robert case 'v': *pRes = '\v'; break;
82ea906c41SOllivier Robert
832b15cb3dSCy Schubert case 'x':
842b15cb3dSCy Schubert case 'X': /* HEX Escape */
852b15cb3dSCy Schubert if (IS_HEX_DIGIT_CHAR(*pzIn)) {
862b15cb3dSCy Schubert char z[4];
872b15cb3dSCy Schubert unsigned int ct = 0;
88ea906c41SOllivier Robert
892b15cb3dSCy Schubert do {
902b15cb3dSCy Schubert z[ct] = pzIn[ct];
912b15cb3dSCy Schubert if (++ct >= 2)
92ea906c41SOllivier Robert break;
932b15cb3dSCy Schubert } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
942b15cb3dSCy Schubert z[ct] = NUL;
952b15cb3dSCy Schubert *pRes = (char)strtoul(z, NULL, 16);
962b15cb3dSCy Schubert return ct + 1;
97ea906c41SOllivier Robert }
98ea906c41SOllivier Robert break;
99ea906c41SOllivier Robert
1002b15cb3dSCy Schubert case '0': case '1': case '2': case '3':
1012b15cb3dSCy Schubert case '4': case '5': case '6': case '7':
1022b15cb3dSCy Schubert {
103ea906c41SOllivier Robert /*
104ea906c41SOllivier Robert * IF the character copied was an octal digit,
1052b15cb3dSCy Schubert * THEN set the output character to an octal value.
1062b15cb3dSCy Schubert * The 3 octal digit result might exceed 0xFF, so check it.
107ea906c41SOllivier Robert */
1082b15cb3dSCy Schubert char z[4];
1092b15cb3dSCy Schubert unsigned long val;
1102b15cb3dSCy Schubert unsigned int ct = 0;
111ea906c41SOllivier Robert
1122b15cb3dSCy Schubert z[ct++] = *--pzIn;
1132b15cb3dSCy Schubert while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
1142b15cb3dSCy Schubert z[ct] = pzIn[ct];
1152b15cb3dSCy Schubert if (++ct >= 3)
116ea906c41SOllivier Robert break;
117ea906c41SOllivier Robert }
118ea906c41SOllivier Robert
1192b15cb3dSCy Schubert z[ct] = NUL;
1202b15cb3dSCy Schubert val = strtoul(z, NULL, 8);
1212b15cb3dSCy Schubert if (val > 0xFF)
1222b15cb3dSCy Schubert val = 0xFF;
1232b15cb3dSCy Schubert *pRes = (char)val;
1242b15cb3dSCy Schubert return ct;
125ea906c41SOllivier Robert }
126ea906c41SOllivier Robert
1272b15cb3dSCy Schubert default: /* quoted character is result character */;
128ea906c41SOllivier Robert }
129ea906c41SOllivier Robert
130ea906c41SOllivier Robert return res;
131ea906c41SOllivier Robert }
132ea906c41SOllivier Robert
133*a466cc55SCy Schubert /**
134*a466cc55SCy Schubert * count newlines between start and end
135*a466cc55SCy Schubert */
136*a466cc55SCy Schubert static char *
nl_count(char * start,char * end,int * lnct_p)137*a466cc55SCy Schubert nl_count(char * start, char * end, int * lnct_p)
138*a466cc55SCy Schubert {
139*a466cc55SCy Schubert while (start < end) {
140*a466cc55SCy Schubert if (*(start++) == NL)
141*a466cc55SCy Schubert (*lnct_p)++;
142*a466cc55SCy Schubert }
143*a466cc55SCy Schubert return end;
144*a466cc55SCy Schubert }
145ea906c41SOllivier Robert
146ea906c41SOllivier Robert /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
147ea906c41SOllivier Robert *
148ea906c41SOllivier Robert * A quoted string has been found.
149ea906c41SOllivier Robert * Find the end of it and compress any escape sequences.
150ea906c41SOllivier Robert */
1512b15cb3dSCy Schubert static bool
contiguous_quote(char ** pps,char * pq,int * lnct_p)1522b15cb3dSCy Schubert contiguous_quote(char ** pps, char * pq, int * lnct_p)
1532b15cb3dSCy Schubert {
1542b15cb3dSCy Schubert char * ps = *pps + 1;
1552b15cb3dSCy Schubert
1562b15cb3dSCy Schubert for (;;) {
1572b15cb3dSCy Schubert while (IS_WHITESPACE_CHAR(*ps))
1582b15cb3dSCy Schubert if (*(ps++) == NL)
1592b15cb3dSCy Schubert (*lnct_p)++;
1602b15cb3dSCy Schubert
1612b15cb3dSCy Schubert /*
1622b15cb3dSCy Schubert * IF the next character is a quote character,
1632b15cb3dSCy Schubert * THEN we will concatenate the strings.
1642b15cb3dSCy Schubert */
1652b15cb3dSCy Schubert switch (*ps) {
1662b15cb3dSCy Schubert case '"':
1672b15cb3dSCy Schubert case '\'':
1682b15cb3dSCy Schubert *pq = *(ps++); /* assign new quote character and return */
1692b15cb3dSCy Schubert *pps = ps;
1702b15cb3dSCy Schubert return true;
1712b15cb3dSCy Schubert
1722b15cb3dSCy Schubert case '/':
1732b15cb3dSCy Schubert /*
1742b15cb3dSCy Schubert * Allow for a comment embedded in the concatenated string.
1752b15cb3dSCy Schubert */
1762b15cb3dSCy Schubert switch (ps[1]) {
1772b15cb3dSCy Schubert default:
178*a466cc55SCy Schubert goto fail_return;
1792b15cb3dSCy Schubert
1802b15cb3dSCy Schubert case '/':
1812b15cb3dSCy Schubert /*
1822b15cb3dSCy Schubert * Skip to end of line
1832b15cb3dSCy Schubert */
1842b15cb3dSCy Schubert ps = strchr(ps, NL);
185*a466cc55SCy Schubert if (ps == NULL)
186*a466cc55SCy Schubert goto fail_return;
1872b15cb3dSCy Schubert break;
1882b15cb3dSCy Schubert
1892b15cb3dSCy Schubert case '*':
190*a466cc55SCy Schubert ps = nl_count(ps + 2, strstr(ps + 2, "*/"), lnct_p);
191*a466cc55SCy Schubert if (ps == NULL)
192*a466cc55SCy Schubert goto fail_return;
193*a466cc55SCy Schubert ps += 2;
1942b15cb3dSCy Schubert }
1952b15cb3dSCy Schubert continue;
1962b15cb3dSCy Schubert
1972b15cb3dSCy Schubert default:
1982b15cb3dSCy Schubert /*
1992b15cb3dSCy Schubert * The next non-whitespace character is not a quote.
2002b15cb3dSCy Schubert * The series of quoted strings has come to an end.
2012b15cb3dSCy Schubert */
2022b15cb3dSCy Schubert *pps = ps;
2032b15cb3dSCy Schubert return false;
2042b15cb3dSCy Schubert }
2052b15cb3dSCy Schubert }
206*a466cc55SCy Schubert
207*a466cc55SCy Schubert fail_return:
208*a466cc55SCy Schubert *pps = NULL;
209*a466cc55SCy Schubert return false;
2102b15cb3dSCy Schubert }
2112b15cb3dSCy Schubert
212ea906c41SOllivier Robert /*=export_func ao_string_cook
213ea906c41SOllivier Robert * private:
214ea906c41SOllivier Robert *
215ea906c41SOllivier Robert * what: concatenate and escape-process strings
216ea906c41SOllivier Robert * arg: + char * + pzScan + The *MODIFIABLE* input buffer +
2172b15cb3dSCy Schubert * arg: + int * + lnct_p + The (possibly NULL) pointer to a line count +
218ea906c41SOllivier Robert *
219ea906c41SOllivier Robert * ret-type: char *
220ea906c41SOllivier Robert * ret-desc: The address of the text following the processed strings.
221ea906c41SOllivier Robert * The return value is NULL if the strings are ill-formed.
222ea906c41SOllivier Robert *
223ea906c41SOllivier Robert * doc:
224ea906c41SOllivier Robert *
225ea906c41SOllivier Robert * A series of one or more quoted strings are concatenated together.
226ea906c41SOllivier Robert * If they are quoted with double quotes (@code{"}), then backslash
227ea906c41SOllivier Robert * escapes are processed per the C programming language. If they are
228ea906c41SOllivier Robert * single quote strings, then the backslashes are honored only when they
229ea906c41SOllivier Robert * precede another backslash or a single quote character.
230ea906c41SOllivier Robert *
231ea906c41SOllivier Robert * err: @code{NULL} is returned if the string(s) is/are mal-formed.
232ea906c41SOllivier Robert =*/
233ea906c41SOllivier Robert char *
ao_string_cook(char * pzScan,int * lnct_p)2342b15cb3dSCy Schubert ao_string_cook(char * pzScan, int * lnct_p)
235ea906c41SOllivier Robert {
236ea906c41SOllivier Robert int l = 0;
237ea906c41SOllivier Robert char q = *pzScan;
238ea906c41SOllivier Robert
239ea906c41SOllivier Robert /*
240ea906c41SOllivier Robert * It is a quoted string. Process the escape sequence characters
241ea906c41SOllivier Robert * (in the set "abfnrtv") and make sure we find a closing quote.
242ea906c41SOllivier Robert */
243ea906c41SOllivier Robert char * pzD = pzScan++;
244ea906c41SOllivier Robert char * pzS = pzScan;
245ea906c41SOllivier Robert
2462b15cb3dSCy Schubert if (lnct_p == NULL)
2472b15cb3dSCy Schubert lnct_p = &l;
248ea906c41SOllivier Robert
249ea906c41SOllivier Robert for (;;) {
250ea906c41SOllivier Robert /*
251ea906c41SOllivier Robert * IF the next character is the quote character, THEN we may end the
252ea906c41SOllivier Robert * string. We end it unless the next non-blank character *after* the
253ea906c41SOllivier Robert * string happens to also be a quote. If it is, then we will change
254ea906c41SOllivier Robert * our quote character to the new quote character and continue
255ea906c41SOllivier Robert * condensing text.
256ea906c41SOllivier Robert */
257ea906c41SOllivier Robert while (*pzS == q) {
258ea906c41SOllivier Robert *pzD = NUL; /* This is probably the end of the line */
2592b15cb3dSCy Schubert if (! contiguous_quote(&pzS, &q, lnct_p))
260ea906c41SOllivier Robert return pzS;
261ea906c41SOllivier Robert }
262ea906c41SOllivier Robert
263ea906c41SOllivier Robert /*
264ea906c41SOllivier Robert * We are inside a quoted string. Copy text.
265ea906c41SOllivier Robert */
266ea906c41SOllivier Robert switch (*(pzD++) = *(pzS++)) {
267ea906c41SOllivier Robert case NUL:
268ea906c41SOllivier Robert return NULL;
269ea906c41SOllivier Robert
2702b15cb3dSCy Schubert case NL:
2712b15cb3dSCy Schubert (*lnct_p)++;
272ea906c41SOllivier Robert break;
273ea906c41SOllivier Robert
274ea906c41SOllivier Robert case '\\':
275ea906c41SOllivier Robert /*
276ea906c41SOllivier Robert * IF we are escaping a new line,
277ea906c41SOllivier Robert * THEN drop both the escape and the newline from
278ea906c41SOllivier Robert * the result string.
279ea906c41SOllivier Robert */
2802b15cb3dSCy Schubert if (*pzS == NL) {
281ea906c41SOllivier Robert pzS++;
282ea906c41SOllivier Robert pzD--;
2832b15cb3dSCy Schubert (*lnct_p)++;
284ea906c41SOllivier Robert }
285ea906c41SOllivier Robert
286ea906c41SOllivier Robert /*
287ea906c41SOllivier Robert * ELSE IF the quote character is '"' or '`',
288ea906c41SOllivier Robert * THEN we do the full escape character processing
289ea906c41SOllivier Robert */
290ea906c41SOllivier Robert else if (q != '\'') {
2912b15cb3dSCy Schubert unsigned int ct;
2922b15cb3dSCy Schubert ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
293ea906c41SOllivier Robert if (ct == 0)
294ea906c41SOllivier Robert return NULL;
295ea906c41SOllivier Robert
296ea906c41SOllivier Robert pzS += ct;
297ea906c41SOllivier Robert } /* if (q != '\'') */
298ea906c41SOllivier Robert
299ea906c41SOllivier Robert /*
300ea906c41SOllivier Robert * OTHERWISE, we only process "\\", "\'" and "\#" sequences.
301ea906c41SOllivier Robert * The latter only to easily hide preprocessing directives.
302ea906c41SOllivier Robert */
303ea906c41SOllivier Robert else switch (*pzS) {
304ea906c41SOllivier Robert case '\\':
305ea906c41SOllivier Robert case '\'':
306ea906c41SOllivier Robert case '#':
307ea906c41SOllivier Robert pzD[-1] = *pzS++;
308ea906c41SOllivier Robert }
309ea906c41SOllivier Robert } /* switch (*(pzD++) = *(pzS++)) */
310ea906c41SOllivier Robert } /* for (;;) */
311ea906c41SOllivier Robert }
3122b15cb3dSCy Schubert
3132b15cb3dSCy Schubert /** @}
3142b15cb3dSCy Schubert *
315ea906c41SOllivier Robert * Local Variables:
316ea906c41SOllivier Robert * mode: C
317ea906c41SOllivier Robert * c-file-style: "stroustrup"
318ea906c41SOllivier Robert * indent-tabs-mode: nil
319ea906c41SOllivier Robert * End:
320ea906c41SOllivier Robert * end of autoopts/cook.c */
321