xref: /minix3/external/bsd/libarchive/dist/libarchive_fe/line_reader.c (revision 543adbed3a3a783ed36434adafbc258b6bde442d)
1*543adbedSBen Gras /*-
2*543adbedSBen Gras  * Copyright (c) 2008 Tim Kientzle
3*543adbedSBen Gras  * All rights reserved.
4*543adbedSBen Gras  *
5*543adbedSBen Gras  * Redistribution and use in source and binary forms, with or without
6*543adbedSBen Gras  * modification, are permitted provided that the following conditions
7*543adbedSBen Gras  * are met:
8*543adbedSBen Gras  * 1. Redistributions of source code must retain the above copyright
9*543adbedSBen Gras  *    notice, this list of conditions and the following disclaimer
10*543adbedSBen Gras  *    in this position and unchanged.
11*543adbedSBen Gras  * 2. Redistributions in binary form must reproduce the above copyright
12*543adbedSBen Gras  *    notice, this list of conditions and the following disclaimer in the
13*543adbedSBen Gras  *    documentation and/or other materials provided with the distribution.
14*543adbedSBen Gras  *
15*543adbedSBen Gras  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16*543adbedSBen Gras  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17*543adbedSBen Gras  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18*543adbedSBen Gras  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19*543adbedSBen Gras  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20*543adbedSBen Gras  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21*543adbedSBen Gras  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22*543adbedSBen Gras  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23*543adbedSBen Gras  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24*543adbedSBen Gras  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*543adbedSBen Gras  */
26*543adbedSBen Gras 
27*543adbedSBen Gras #include "lafe_platform.h"
28*543adbedSBen Gras __FBSDID("$FreeBSD$");
29*543adbedSBen Gras 
30*543adbedSBen Gras #include <errno.h>
31*543adbedSBen Gras #include <stdio.h>
32*543adbedSBen Gras #include <stdlib.h>
33*543adbedSBen Gras #include <string.h>
34*543adbedSBen Gras 
35*543adbedSBen Gras #include "err.h"
36*543adbedSBen Gras #include "line_reader.h"
37*543adbedSBen Gras 
38*543adbedSBen Gras #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__BORLANDC__)
39*543adbedSBen Gras #define strdup _strdup
40*543adbedSBen Gras #endif
41*543adbedSBen Gras 
42*543adbedSBen Gras /*
43*543adbedSBen Gras  * Read lines from file and do something with each one.  If option_null
44*543adbedSBen Gras  * is set, lines are terminated with zero bytes; otherwise, they're
45*543adbedSBen Gras  * terminated with newlines.
46*543adbedSBen Gras  *
47*543adbedSBen Gras  * This uses a self-sizing buffer to handle arbitrarily-long lines.
48*543adbedSBen Gras  */
49*543adbedSBen Gras struct lafe_line_reader {
50*543adbedSBen Gras 	FILE *f;
51*543adbedSBen Gras 	char *buff, *buff_end, *line_start, *line_end, *p;
52*543adbedSBen Gras 	char *pathname;
53*543adbedSBen Gras 	size_t buff_length;
54*543adbedSBen Gras 	int nullSeparator; /* Lines separated by null, not CR/CRLF/etc. */
55*543adbedSBen Gras 	int ret;
56*543adbedSBen Gras };
57*543adbedSBen Gras 
58*543adbedSBen Gras struct lafe_line_reader *
lafe_line_reader(const char * pathname,int nullSeparator)59*543adbedSBen Gras lafe_line_reader(const char *pathname, int nullSeparator)
60*543adbedSBen Gras {
61*543adbedSBen Gras 	struct lafe_line_reader *lr;
62*543adbedSBen Gras 
63*543adbedSBen Gras 	lr = calloc(1, sizeof(*lr));
64*543adbedSBen Gras 	if (lr == NULL)
65*543adbedSBen Gras 		lafe_errc(1, ENOMEM, "Can't open %s", pathname);
66*543adbedSBen Gras 
67*543adbedSBen Gras 	lr->nullSeparator = nullSeparator;
68*543adbedSBen Gras 	lr->pathname = strdup(pathname);
69*543adbedSBen Gras 
70*543adbedSBen Gras 	if (strcmp(pathname, "-") == 0)
71*543adbedSBen Gras 		lr->f = stdin;
72*543adbedSBen Gras 	else
73*543adbedSBen Gras 		lr->f = fopen(pathname, "r");
74*543adbedSBen Gras 	if (lr->f == NULL)
75*543adbedSBen Gras 		lafe_errc(1, errno, "Couldn't open %s", pathname);
76*543adbedSBen Gras 	lr->buff_length = 8192;
77*543adbedSBen Gras 	lr->buff = malloc(lr->buff_length);
78*543adbedSBen Gras 	if (lr->buff == NULL)
79*543adbedSBen Gras 		lafe_errc(1, ENOMEM, "Can't read %s", pathname);
80*543adbedSBen Gras 	lr->line_start = lr->line_end = lr->buff_end = lr->buff;
81*543adbedSBen Gras 
82*543adbedSBen Gras 	return (lr);
83*543adbedSBen Gras }
84*543adbedSBen Gras 
85*543adbedSBen Gras const char *
lafe_line_reader_next(struct lafe_line_reader * lr)86*543adbedSBen Gras lafe_line_reader_next(struct lafe_line_reader *lr)
87*543adbedSBen Gras {
88*543adbedSBen Gras 	size_t bytes_wanted, bytes_read, new_buff_size;
89*543adbedSBen Gras 	char *line_start, *p;
90*543adbedSBen Gras 
91*543adbedSBen Gras 	for (;;) {
92*543adbedSBen Gras 		/* If there's a line in the buffer, return it immediately. */
93*543adbedSBen Gras 		while (lr->line_end < lr->buff_end) {
94*543adbedSBen Gras 			if (lr->nullSeparator) {
95*543adbedSBen Gras 				if (*lr->line_end == '\0') {
96*543adbedSBen Gras 					line_start = lr->line_start;
97*543adbedSBen Gras 					lr->line_start = lr->line_end + 1;
98*543adbedSBen Gras 					lr->line_end = lr->line_start;
99*543adbedSBen Gras 					return (line_start);
100*543adbedSBen Gras 				}
101*543adbedSBen Gras 			} else if (*lr->line_end == '\x0a' || *lr->line_end == '\x0d') {
102*543adbedSBen Gras 				*lr->line_end = '\0';
103*543adbedSBen Gras 				line_start = lr->line_start;
104*543adbedSBen Gras 				lr->line_start = lr->line_end + 1;
105*543adbedSBen Gras 				lr->line_end = lr->line_start;
106*543adbedSBen Gras 				if (line_start[0] != '\0')
107*543adbedSBen Gras 					return (line_start);
108*543adbedSBen Gras 			}
109*543adbedSBen Gras 			lr->line_end++;
110*543adbedSBen Gras 		}
111*543adbedSBen Gras 
112*543adbedSBen Gras 		/* If we're at end-of-file, process the final data. */
113*543adbedSBen Gras 		if (lr->f == NULL) {
114*543adbedSBen Gras 			/* If there's more text, return one last line. */
115*543adbedSBen Gras 			if (lr->line_end > lr->line_start) {
116*543adbedSBen Gras 				*lr->line_end = '\0';
117*543adbedSBen Gras 				line_start = lr->line_start;
118*543adbedSBen Gras 				lr->line_start = lr->line_end + 1;
119*543adbedSBen Gras 				lr->line_end = lr->line_start;
120*543adbedSBen Gras 				return (line_start);
121*543adbedSBen Gras 			}
122*543adbedSBen Gras 			/* Otherwise, we're done. */
123*543adbedSBen Gras 			return (NULL);
124*543adbedSBen Gras 		}
125*543adbedSBen Gras 
126*543adbedSBen Gras 		/* Buffer only has part of a line. */
127*543adbedSBen Gras 		if (lr->line_start > lr->buff) {
128*543adbedSBen Gras 			/* Move a leftover fractional line to the beginning. */
129*543adbedSBen Gras 			memmove(lr->buff, lr->line_start,
130*543adbedSBen Gras 			    lr->buff_end - lr->line_start);
131*543adbedSBen Gras 			lr->buff_end -= lr->line_start - lr->buff;
132*543adbedSBen Gras 			lr->line_end -= lr->line_start - lr->buff;
133*543adbedSBen Gras 			lr->line_start = lr->buff;
134*543adbedSBen Gras 		} else {
135*543adbedSBen Gras 			/* Line is too big; enlarge the buffer. */
136*543adbedSBen Gras 			new_buff_size = lr->buff_length * 2;
137*543adbedSBen Gras 			if (new_buff_size <= lr->buff_length)
138*543adbedSBen Gras 				lafe_errc(1, ENOMEM,
139*543adbedSBen Gras 				    "Line too long in %s", lr->pathname);
140*543adbedSBen Gras 			lr->buff_length = new_buff_size;
141*543adbedSBen Gras 			p = realloc(lr->buff, new_buff_size);
142*543adbedSBen Gras 			if (p == NULL)
143*543adbedSBen Gras 				lafe_errc(1, ENOMEM,
144*543adbedSBen Gras 				    "Line too long in %s", lr->pathname);
145*543adbedSBen Gras 			lr->buff_end = p + (lr->buff_end - lr->buff);
146*543adbedSBen Gras 			lr->line_end = p + (lr->line_end - lr->buff);
147*543adbedSBen Gras 			lr->line_start = lr->buff = p;
148*543adbedSBen Gras 		}
149*543adbedSBen Gras 
150*543adbedSBen Gras 		/* Get some more data into the buffer. */
151*543adbedSBen Gras 		bytes_wanted = lr->buff + lr->buff_length - lr->buff_end;
152*543adbedSBen Gras 		bytes_read = fread(lr->buff_end, 1, bytes_wanted, lr->f);
153*543adbedSBen Gras 		lr->buff_end += bytes_read;
154*543adbedSBen Gras 
155*543adbedSBen Gras 		if (ferror(lr->f))
156*543adbedSBen Gras 			lafe_errc(1, errno, "Can't read %s", lr->pathname);
157*543adbedSBen Gras 		if (feof(lr->f)) {
158*543adbedSBen Gras 			if (lr->f != stdin)
159*543adbedSBen Gras 				fclose(lr->f);
160*543adbedSBen Gras 			lr->f = NULL;
161*543adbedSBen Gras 		}
162*543adbedSBen Gras 	}
163*543adbedSBen Gras }
164*543adbedSBen Gras 
165*543adbedSBen Gras void
lafe_line_reader_free(struct lafe_line_reader * lr)166*543adbedSBen Gras lafe_line_reader_free(struct lafe_line_reader *lr)
167*543adbedSBen Gras {
168*543adbedSBen Gras 	free(lr->buff);
169*543adbedSBen Gras 	free(lr->pathname);
170*543adbedSBen Gras 	free(lr);
171*543adbedSBen Gras }
172