xref: /netbsd-src/lib/libc/stdio/fparseln.c (revision 550147bd6a5bef946e7a7f153e49c0d11314e598)
1*550147bdSsnj /*	$NetBSD: fparseln.c,v 1.10 2009/10/21 01:07:45 snj Exp $	*/
2b1e79510Slukem 
3b1e79510Slukem /*
4b1e79510Slukem  * Copyright (c) 1997 Christos Zoulas.  All rights reserved.
5b1e79510Slukem  *
6b1e79510Slukem  * Redistribution and use in source and binary forms, with or without
7b1e79510Slukem  * modification, are permitted provided that the following conditions
8b1e79510Slukem  * are met:
9b1e79510Slukem  * 1. Redistributions of source code must retain the above copyright
10b1e79510Slukem  *    notice, this list of conditions and the following disclaimer.
11b1e79510Slukem  * 2. Redistributions in binary form must reproduce the above copyright
12b1e79510Slukem  *    notice, this list of conditions and the following disclaimer in the
13b1e79510Slukem  *    documentation and/or other materials provided with the distribution.
14b1e79510Slukem  *
15b1e79510Slukem  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16b1e79510Slukem  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17b1e79510Slukem  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18b1e79510Slukem  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19b1e79510Slukem  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20b1e79510Slukem  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21b1e79510Slukem  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22b1e79510Slukem  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23b1e79510Slukem  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24b1e79510Slukem  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25b1e79510Slukem  */
26b1e79510Slukem 
27b1e79510Slukem #include <sys/cdefs.h>
28b1e79510Slukem #if defined(LIBC_SCCS) && !defined(lint)
29*550147bdSsnj __RCSID("$NetBSD: fparseln.c,v 1.10 2009/10/21 01:07:45 snj Exp $");
30b1e79510Slukem #endif /* LIBC_SCCS and not lint */
31b1e79510Slukem 
32b1e79510Slukem #include "namespace.h"
33b1e79510Slukem 
34b1e79510Slukem #include <assert.h>
35b1e79510Slukem #include <errno.h>
36b1e79510Slukem #include <stdio.h>
37b1e79510Slukem #include <string.h>
38b1e79510Slukem #include <stdlib.h>
39b1e79510Slukem 
40b1e79510Slukem #ifdef __weak_alias
41b1e79510Slukem __weak_alias(fparseln,_fparseln)
42b1e79510Slukem #endif
43b1e79510Slukem 
442af58f1cStnozaki #if ! HAVE_FPARSELN || BROKEN_FPARSELN
45b1e79510Slukem 
46f56a8e58Sdrochner #ifndef HAVE_NBTOOL_CONFIG_H
4764b5a4f2Sdrochner #include "reentrant.h"
4864b5a4f2Sdrochner #include "local.h"
49f56a8e58Sdrochner #else
50f56a8e58Sdrochner #define FLOCKFILE(fp)
51f56a8e58Sdrochner #define FUNLOCKFILE(fp)
52f56a8e58Sdrochner #endif
5364b5a4f2Sdrochner 
54b2f78261Sjmc #if defined(_REENTRANT) && !HAVE_NBTOOL_CONFIG_H
5564b5a4f2Sdrochner #define __fgetln(f, l) __fgetstr(f, l, '\n')
5664b5a4f2Sdrochner #else
5764b5a4f2Sdrochner #define __fgetln(f, l) fgetln(f, l)
5864b5a4f2Sdrochner #endif
5964b5a4f2Sdrochner 
60b1e79510Slukem static int isescaped(const char *, const char *, int);
61b1e79510Slukem 
62b1e79510Slukem /* isescaped():
63b1e79510Slukem  *	Return true if the character in *p that belongs to a string
64b1e79510Slukem  *	that starts in *sp, is escaped by the escape character esc.
65b1e79510Slukem  */
66b1e79510Slukem static int
isescaped(const char * sp,const char * p,int esc)67b1e79510Slukem isescaped(const char *sp, const char *p, int esc)
68b1e79510Slukem {
69b1e79510Slukem 	const char     *cp;
70b1e79510Slukem 	size_t		ne;
71b1e79510Slukem 
72b1e79510Slukem 	_DIAGASSERT(sp != NULL);
73b1e79510Slukem 	_DIAGASSERT(p != NULL);
74b1e79510Slukem 
75b1e79510Slukem 	/* No escape character */
76b1e79510Slukem 	if (esc == '\0')
777af4323fSdrochner 		return 0;
78b1e79510Slukem 
79b1e79510Slukem 	/* Count the number of escape characters that precede ours */
80b1e79510Slukem 	for (ne = 0, cp = p; --cp >= sp && *cp == esc; ne++)
81b1e79510Slukem 		continue;
82b1e79510Slukem 
83b1e79510Slukem 	/* Return true if odd number of escape characters */
84b1e79510Slukem 	return (ne & 1) != 0;
85b1e79510Slukem }
86b1e79510Slukem 
87b1e79510Slukem 
88b1e79510Slukem /* fparseln():
89b1e79510Slukem  *	Read a line from a file parsing continuations ending in \
90b1e79510Slukem  *	and eliminating trailing newlines, or comments starting with
91b1e79510Slukem  *	the comment char.
92b1e79510Slukem  */
93b1e79510Slukem char *
fparseln(FILE * fp,size_t * size,size_t * lineno,const char str[3],int flags)94b1e79510Slukem fparseln(FILE *fp, size_t *size, size_t *lineno, const char str[3], int flags)
95b1e79510Slukem {
96b1e79510Slukem 	static const char dstr[3] = { '\\', '\\', '#' };
97b1e79510Slukem 
98b1e79510Slukem 	size_t	s, len;
99b1e79510Slukem 	char   *buf;
100b1e79510Slukem 	char   *ptr, *cp;
101b1e79510Slukem 	int	cnt;
102b1e79510Slukem 	char	esc, con, nl, com;
103b1e79510Slukem 
104b1e79510Slukem 	_DIAGASSERT(fp != NULL);
105b1e79510Slukem 
106b1e79510Slukem 	len = 0;
107b1e79510Slukem 	buf = NULL;
108b1e79510Slukem 	cnt = 1;
109b1e79510Slukem 
110b1e79510Slukem 	if (str == NULL)
111b1e79510Slukem 		str = dstr;
112b1e79510Slukem 
113b1e79510Slukem 	esc = str[0];
114b1e79510Slukem 	con = str[1];
115b1e79510Slukem 	com = str[2];
116b1e79510Slukem 	/*
117b1e79510Slukem 	 * XXX: it would be cool to be able to specify the newline character,
118b1e79510Slukem 	 * but unfortunately, fgetln does not let us
119b1e79510Slukem 	 */
120b1e79510Slukem 	nl  = '\n';
121b1e79510Slukem 
12264b5a4f2Sdrochner 	FLOCKFILE(fp);
12364b5a4f2Sdrochner 
124b1e79510Slukem 	while (cnt) {
125b1e79510Slukem 		cnt = 0;
126b1e79510Slukem 
127b1e79510Slukem 		if (lineno)
128b1e79510Slukem 			(*lineno)++;
129b1e79510Slukem 
13064b5a4f2Sdrochner 		if ((ptr = __fgetln(fp, &s)) == NULL)
131b1e79510Slukem 			break;
132b1e79510Slukem 
133b1e79510Slukem 		if (s && com) {		/* Check and eliminate comments */
134b1e79510Slukem 			for (cp = ptr; cp < ptr + s; cp++)
135b1e79510Slukem 				if (*cp == com && !isescaped(ptr, cp, esc)) {
136b1e79510Slukem 					s = cp - ptr;
137b1e79510Slukem 					cnt = s == 0 && buf == NULL;
138b1e79510Slukem 					break;
139b1e79510Slukem 				}
140b1e79510Slukem 		}
141b1e79510Slukem 
142b1e79510Slukem 		if (s && nl) { 		/* Check and eliminate newlines */
143b1e79510Slukem 			cp = &ptr[s - 1];
144b1e79510Slukem 
145b1e79510Slukem 			if (*cp == nl)
146b1e79510Slukem 				s--;	/* forget newline */
147b1e79510Slukem 		}
148b1e79510Slukem 
149b1e79510Slukem 		if (s && con) {		/* Check and eliminate continuations */
150b1e79510Slukem 			cp = &ptr[s - 1];
151b1e79510Slukem 
152b1e79510Slukem 			if (*cp == con && !isescaped(ptr, cp, esc)) {
1535a8030baSdrochner 				s--;	/* forget continuation char */
154b1e79510Slukem 				cnt = 1;
155b1e79510Slukem 			}
156b1e79510Slukem 		}
157b1e79510Slukem 
1585a8030baSdrochner 		if (s == 0) {
1595a8030baSdrochner 			/*
1605a8030baSdrochner 			 * nothing to add, skip realloc except in case
1615a8030baSdrochner 			 * we need a minimal buf to return an empty line
1625a8030baSdrochner 			 */
1635a8030baSdrochner 			if (cnt || buf != NULL)
164b1e79510Slukem 				continue;
1655a8030baSdrochner 		}
166b1e79510Slukem 
167b1e79510Slukem 		if ((cp = realloc(buf, len + s + 1)) == NULL) {
16864b5a4f2Sdrochner 			FUNLOCKFILE(fp);
169b1e79510Slukem 			free(buf);
170b1e79510Slukem 			return NULL;
171b1e79510Slukem 		}
172b1e79510Slukem 		buf = cp;
173b1e79510Slukem 
174b1e79510Slukem 		(void) memcpy(buf + len, ptr, s);
175b1e79510Slukem 		len += s;
176b1e79510Slukem 		buf[len] = '\0';
177b1e79510Slukem 	}
178b1e79510Slukem 
17964b5a4f2Sdrochner 	FUNLOCKFILE(fp);
18064b5a4f2Sdrochner 
181b1e79510Slukem 	if ((flags & FPARSELN_UNESCALL) != 0 && esc && buf != NULL &&
182b1e79510Slukem 	    strchr(buf, esc) != NULL) {
183b1e79510Slukem 		ptr = cp = buf;
184b1e79510Slukem 		while (cp[0] != '\0') {
185b1e79510Slukem 			int skipesc;
186b1e79510Slukem 
187b1e79510Slukem 			while (cp[0] != '\0' && cp[0] != esc)
188b1e79510Slukem 				*ptr++ = *cp++;
189b1e79510Slukem 			if (cp[0] == '\0' || cp[1] == '\0')
190b1e79510Slukem 				break;
191b1e79510Slukem 
192b1e79510Slukem 			skipesc = 0;
193b1e79510Slukem 			if (cp[1] == com)
194b1e79510Slukem 				skipesc += (flags & FPARSELN_UNESCCOMM);
195b1e79510Slukem 			if (cp[1] == con)
196b1e79510Slukem 				skipesc += (flags & FPARSELN_UNESCCONT);
197b1e79510Slukem 			if (cp[1] == esc)
198b1e79510Slukem 				skipesc += (flags & FPARSELN_UNESCESC);
199b1e79510Slukem 			if (cp[1] != com && cp[1] != con && cp[1] != esc)
200b1e79510Slukem 				skipesc = (flags & FPARSELN_UNESCREST);
201b1e79510Slukem 
202b1e79510Slukem 			if (skipesc)
203b1e79510Slukem 				cp++;
204b1e79510Slukem 			else
205b1e79510Slukem 				*ptr++ = *cp++;
206b1e79510Slukem 			*ptr++ = *cp++;
207b1e79510Slukem 		}
208b1e79510Slukem 		*ptr = '\0';
209b1e79510Slukem 		len = strlen(buf);
210b1e79510Slukem 	}
211b1e79510Slukem 
212b1e79510Slukem 	if (size)
213b1e79510Slukem 		*size = len;
214b1e79510Slukem 	return buf;
215b1e79510Slukem }
216b1e79510Slukem 
217b1e79510Slukem #ifdef TEST
218b1e79510Slukem 
219b1e79510Slukem int main(int, char **);
220b1e79510Slukem 
221b1e79510Slukem int
main(int argc,char ** argv)222b1e79510Slukem main(int argc, char **argv)
223b1e79510Slukem {
224b1e79510Slukem 	char   *ptr;
225b1e79510Slukem 	size_t	size, line;
226b1e79510Slukem 
227b1e79510Slukem 	line = 0;
228b1e79510Slukem 	while ((ptr = fparseln(stdin, &size, &line, NULL,
229b1e79510Slukem 	    FPARSELN_UNESCALL)) != NULL)
230b1e79510Slukem 		printf("line %d (%d) |%s|\n", line, size, ptr);
231b1e79510Slukem 	return 0;
232b1e79510Slukem }
233b1e79510Slukem 
234b1e79510Slukem /*
235b1e79510Slukem 
236b1e79510Slukem # This is a test
237b1e79510Slukem line 1
238b1e79510Slukem line 2 \
239b1e79510Slukem line 3 # Comment
240b1e79510Slukem line 4 \# Not comment \\\\
241b1e79510Slukem 
242b1e79510Slukem # And a comment \
243b1e79510Slukem line 5 \\\
244b1e79510Slukem line 6
245b1e79510Slukem 
246b1e79510Slukem */
247b1e79510Slukem 
248b1e79510Slukem #endif /* TEST */
2492af58f1cStnozaki #endif	/* ! HAVE_FPARSELN || BROKEN_FPARSELN */
250