xref: /minix3/usr.bin/col/col.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1 /*	$NetBSD: col.c,v 1.18 2014/10/18 14:56:14 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1990, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Michael Rendell of the Memorial University of Newfoundland.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 #ifndef lint
37 __COPYRIGHT("@(#) Copyright (c) 1990, 1993, 1994\
38  The Regents of the University of California.  All rights reserved.");
39 #endif /* not lint */
40 
41 #ifndef lint
42 #if 0
43 static char sccsid[] = "@(#)col.c	8.5 (Berkeley) 5/4/95";
44 #endif
45 __RCSID("$NetBSD: col.c,v 1.18 2014/10/18 14:56:14 christos Exp $");
46 #endif /* not lint */
47 
48 #include <ctype.h>
49 #include <err.h>
50 #include <string.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <unistd.h>
54 
55 #define	BS	'\b'		/* backspace */
56 #define	TAB	'\t'		/* tab */
57 #define	SPACE	' '		/* space */
58 #define	NL	'\n'		/* newline */
59 #define	CR	'\r'		/* carriage return */
60 #define	ESC	'\033'		/* escape */
61 #define	SI	'\017'		/* shift in to normal character set */
62 #define	SO	'\016'		/* shift out to alternate character set */
63 #define	VT	'\013'		/* vertical tab (aka reverse line feed) */
64 #define	RLF	'\007'		/* ESC-07 reverse line feed */
65 #define	RHLF	'\010'		/* ESC-010 reverse half-line feed */
66 #define	FHLF	'\011'		/* ESC-011 forward half-line feed */
67 
68 /* build up at least this many lines before flushing them out */
69 #define	BUFFER_MARGIN		32
70 
71 typedef char CSET;
72 
73 typedef struct char_str {
74 #define	CS_NORMAL	1
75 #define	CS_ALTERNATE	2
76 	short		c_column;	/* column character is in */
77 	CSET		c_set;		/* character set (currently only 2) */
78 	char		c_char;		/* character in question */
79 } CHAR;
80 
81 typedef struct line_str LINE;
82 struct line_str {
83 	CHAR	*l_line;		/* characters on the line */
84 	LINE	*l_prev;		/* previous line */
85 	LINE	*l_next;		/* next line */
86 	int	l_lsize;		/* allocated sizeof l_line */
87 	int	l_line_len;		/* strlen(l_line) */
88 	int	l_needs_sort;		/* set if chars went in out of order */
89 	int	l_max_col;		/* max column in the line */
90 };
91 
92 static LINE   *alloc_line(void);
93 static void	dowarn(int);
94 static void	flush_line(LINE *);
95 static void	flush_lines(int);
96 static void	flush_blanks(void);
97 static void	free_line(LINE *);
98 __dead static void	usage(void);
99 __dead static void	wrerr(void);
100 static void   *xmalloc(void *, size_t);
101 
102 static CSET	last_set;		/* char_set of last char printed */
103 static LINE   *lines;
104 static int	compress_spaces;	/* if doing space -> tab conversion */
105 static int	fine;			/* if `fine' resolution (half lines) */
106 static int	max_bufd_lines;		/* max # lines to keep in memory */
107 static int	nblank_lines;		/* # blanks after last flushed line */
108 static int	no_backspaces;		/* if not to output any backspaces */
109 static int	pass_unknown_seqs;	/* whether to pass unknown control sequences */
110 
111 #define	PUTC(ch) \
112 	if (putchar(ch) == EOF) \
113 		wrerr();
114 
115 int
main(int argc,char ** argv)116 main(int argc, char **argv)
117 {
118 	int ch;
119 	CHAR *c;
120 	CSET cur_set;			/* current character set */
121 	LINE *l;			/* current line */
122 	int extra_lines;		/* # of lines above first line */
123 	int cur_col;			/* current column */
124 	int cur_line;			/* line number of current position */
125 	int max_line;			/* max value of cur_line */
126 	int this_line;			/* line l points to */
127 	int nflushd_lines;		/* number of lines that were flushed */
128 	int adjust, opt, warned;
129 
130 	max_bufd_lines = 128;
131 	compress_spaces = 1;		/* compress spaces into tabs */
132 	pass_unknown_seqs = 0;		/* remove unknown escape sequences */
133 	while ((opt = getopt(argc, argv, "bfhl:px")) != -1)
134 		switch (opt) {
135 		case 'b':		/* do not output backspaces */
136 			no_backspaces = 1;
137 			break;
138 		case 'f':		/* allow half forward line feeds */
139 			fine = 1;
140 			break;
141 		case 'h':		/* compress spaces into tabs */
142 			compress_spaces = 1;
143 			break;
144 		case 'l':		/* buffered line count */
145 			if ((max_bufd_lines = atoi(optarg)) <= 0) {
146 				(void)fprintf(stderr,
147 				    "col: bad -l argument %s.\n", optarg);
148 				exit(EXIT_FAILURE);
149 			}
150 			break;
151 		case 'p':		/* pass unknown control sequences */
152 			pass_unknown_seqs = 1;
153 			break;
154 		case 'x':		/* do not compress spaces into tabs */
155 			compress_spaces = 0;
156 			break;
157 		case '?':
158 		default:
159 			usage();
160 		}
161 
162 	if (optind != argc)
163 		usage();
164 
165 	/* this value is in half lines */
166 	max_bufd_lines *= 2;
167 
168 	adjust = cur_col = extra_lines = warned = 0;
169 	cur_line = max_line = nflushd_lines = this_line = 0;
170 	cur_set = last_set = CS_NORMAL;
171 	lines = l = alloc_line();
172 
173 	while ((ch = getchar()) != EOF) {
174 		if (!isgraph(ch)) {
175 			switch (ch) {
176 			case BS:		/* can't go back further */
177 				if (cur_col == 0)
178 					continue;
179 				--cur_col;
180 				continue;
181 			case CR:
182 				cur_col = 0;
183 				continue;
184 			case ESC:		/* just ignore EOF */
185 				switch(getchar()) {
186 				case RLF:
187 					cur_line -= 2;
188 					break;
189 				case RHLF:
190 					cur_line--;
191 					break;
192 				case FHLF:
193 					cur_line++;
194 					if (cur_line > max_line)
195 						max_line = cur_line;
196 				}
197 				continue;
198 			case NL:
199 				cur_line += 2;
200 				if (cur_line > max_line)
201 					max_line = cur_line;
202 				cur_col = 0;
203 				continue;
204 			case SPACE:
205 				++cur_col;
206 				continue;
207 			case SI:
208 				cur_set = CS_NORMAL;
209 				continue;
210 			case SO:
211 				cur_set = CS_ALTERNATE;
212 				continue;
213 			case TAB:		/* adjust column */
214 				cur_col |= 7;
215 				++cur_col;
216 				continue;
217 			case VT:
218 				cur_line -= 2;
219 				continue;
220 			}
221 			if (!pass_unknown_seqs)
222 				continue;
223 		}
224 
225 		/* Must stuff ch in a line - are we at the right one? */
226 		if (cur_line != this_line - adjust) {
227 			LINE *lnew;
228 			int nmove;
229 
230 			adjust = 0;
231 			nmove = cur_line - this_line;
232 			if (!fine) {
233 				/* round up to next line */
234 				if (cur_line & 1) {
235 					adjust = 1;
236 					nmove++;
237 				}
238 			}
239 			if (nmove < 0) {
240 				for (; nmove < 0 && l->l_prev; nmove++)
241 					l = l->l_prev;
242 				if (nmove) {
243 					if (nflushd_lines == 0) {
244 						/*
245 						 * Allow backup past first
246 						 * line if nothing has been
247 						 * flushed yet.
248 						 */
249 						for (; nmove < 0; nmove++) {
250 							lnew = alloc_line();
251 							l->l_prev = lnew;
252 							lnew->l_next = l;
253 							l = lines = lnew;
254 							extra_lines++;
255 						}
256 					} else {
257 						if (!warned++)
258 							dowarn(cur_line);
259 						cur_line -= nmove;
260 					}
261 				}
262 			} else {
263 				/* may need to allocate here */
264 				for (; nmove > 0 && l->l_next; nmove--)
265 					l = l->l_next;
266 				for (; nmove > 0; nmove--) {
267 					lnew = alloc_line();
268 					lnew->l_prev = l;
269 					l->l_next = lnew;
270 					l = lnew;
271 				}
272 			}
273 			this_line = cur_line + adjust;
274 			nmove = this_line - nflushd_lines;
275 			if (nmove >= max_bufd_lines + BUFFER_MARGIN) {
276 				nflushd_lines += nmove - max_bufd_lines;
277 				flush_lines(nmove - max_bufd_lines);
278 			}
279 		}
280 		/* grow line's buffer? */
281 		if (l->l_line_len + 1 >= l->l_lsize) {
282 			int need;
283 
284 			need = l->l_lsize ? l->l_lsize * 2 : 90;
285 			l->l_line = (CHAR *)xmalloc((void *) l->l_line,
286 			    (unsigned) need * sizeof(CHAR));
287 			l->l_lsize = need;
288 		}
289 		c = &l->l_line[l->l_line_len++];
290 		c->c_char = ch;
291 		c->c_set = cur_set;
292 		c->c_column = cur_col;
293 		/*
294 		 * If things are put in out of order, they will need sorting
295 		 * when it is flushed.
296 		 */
297 		if (cur_col < l->l_max_col)
298 			l->l_needs_sort = 1;
299 		else
300 			l->l_max_col = cur_col;
301 		cur_col++;
302 	}
303 	if (max_line == 0)
304 		exit(EXIT_SUCCESS);	/* no lines, so just exit */
305 
306 	/* goto the last line that had a character on it */
307 	for (; l->l_next; l = l->l_next)
308 		this_line++;
309 	flush_lines(this_line - nflushd_lines + extra_lines + 1);
310 
311 	/* make sure we leave things in a sane state */
312 	if (last_set != CS_NORMAL)
313 		PUTC('\017');
314 
315 	/* flush out the last few blank lines */
316 	nblank_lines = max_line - this_line;
317 	if (max_line & 1)
318 		nblank_lines++;
319 	else if (!nblank_lines)
320 		/* missing a \n on the last line? */
321 		nblank_lines = 2;
322 	flush_blanks();
323 	exit(EXIT_SUCCESS);
324 	/* NOTREACHED */
325 }
326 
327 static void
flush_lines(int nflush)328 flush_lines(int nflush)
329 {
330 	LINE *l;
331 
332 	while (--nflush >= 0) {
333 		l = lines;
334 		lines = l->l_next;
335 		if (l->l_line) {
336 			flush_blanks();
337 			flush_line(l);
338 		}
339 		nblank_lines++;
340 		if (l->l_line)
341 			(void)free((void *)l->l_line);
342 		free_line(l);
343 	}
344 	if (lines)
345 		lines->l_prev = NULL;
346 }
347 
348 /*
349  * Print a number of newline/half newlines.  If fine flag is set, nblank_lines
350  * is the number of half line feeds, otherwise it is the number of whole line
351  * feeds.
352  */
353 static void
flush_blanks(void)354 flush_blanks(void)
355 {
356 	int half, i, nb;
357 
358 	half = 0;
359 	nb = nblank_lines;
360 	if (nb & 1) {
361 		if (fine)
362 			half = 1;
363 		else
364 			nb++;
365 	}
366 	nb /= 2;
367 	for (i = nb; --i >= 0;)
368 		PUTC('\n');
369 	if (half) {
370 		PUTC('\033');
371 		PUTC('\011');
372 		if (!nb)
373 			PUTC('\r');
374 	}
375 	nblank_lines = 0;
376 }
377 
378 /*
379  * Write a line to stdout taking care of space to tab conversion (-h flag)
380  * and character set shifts.
381  */
382 static void
flush_line(LINE * l)383 flush_line(LINE *l)
384 {
385 	CHAR *c, *endc;
386 	int nchars, last_col, this_col;
387 
388 	last_col = 0;
389 	nchars = l->l_line_len;
390 
391 	if (l->l_needs_sort) {
392 		static CHAR *sorted;
393 		static int count_size, *count, i, save, sorted_size, tot;
394 
395 		/*
396 		 * Do an O(n) sort on l->l_line by column being careful to
397 		 * preserve the order of characters in the same column.
398 		 */
399 		if (l->l_lsize > sorted_size) {
400 			sorted_size = l->l_lsize;
401 			sorted = (CHAR *)xmalloc((void *)sorted,
402 			    (unsigned)sizeof(CHAR) * sorted_size);
403 		}
404 		if (l->l_max_col >= count_size) {
405 			count_size = l->l_max_col + 1;
406 			count = (int *)xmalloc((void *)count,
407 			    (unsigned)sizeof(int) * count_size);
408 		}
409 		(void)memset(count, 0, sizeof(int) * l->l_max_col + 1);
410 		for (i = nchars, c = l->l_line; --i >= 0; c++)
411 			count[c->c_column]++;
412 
413 		/*
414 		 * calculate running total (shifted down by 1) to use as
415 		 * indices into new line.
416 		 */
417 		for (tot = 0, i = 0; i <= l->l_max_col; i++) {
418 			save = count[i];
419 			count[i] = tot;
420 			tot += save;
421 		}
422 
423 		for (i = nchars, c = l->l_line; --i >= 0; c++)
424 			sorted[count[c->c_column]++] = *c;
425 		c = sorted;
426 	} else
427 		c = l->l_line;
428 	while (nchars > 0) {
429 		this_col = c->c_column;
430 		endc = c;
431 		do {
432 			++endc;
433 		} while (--nchars > 0 && this_col == endc->c_column);
434 
435 		/* if -b only print last character */
436 		if (no_backspaces)
437 			c = endc - 1;
438 
439 		if (this_col > last_col) {
440 			int nspace = this_col - last_col;
441 
442 			if (compress_spaces && nspace > 1) {
443 				int ntabs;
444 
445 				ntabs = ((last_col % 8) + nspace) / 8;
446 				if (ntabs) {
447 					nspace -= (ntabs * 8) - (last_col % 8);
448 					while (--ntabs >= 0)
449 						PUTC('\t');
450 				}
451 			}
452 			while (--nspace >= 0)
453 				PUTC(' ');
454 			last_col = this_col;
455 		}
456 		last_col++;
457 
458 		for (;;) {
459 			if (c->c_set != last_set) {
460 				switch (c->c_set) {
461 				case CS_NORMAL:
462 					PUTC('\017');
463 					break;
464 				case CS_ALTERNATE:
465 					PUTC('\016');
466 				}
467 				last_set = c->c_set;
468 			}
469 			PUTC(c->c_char);
470 			if (++c >= endc)
471 				break;
472 			PUTC('\b');
473 		}
474 	}
475 }
476 
477 #define	NALLOC 64
478 
479 static LINE *line_freelist;
480 
481 static LINE *
alloc_line(void)482 alloc_line(void)
483 {
484 	LINE *l;
485 	int i;
486 
487 	if (!line_freelist) {
488 		l = (LINE *)xmalloc(NULL, sizeof(LINE) * NALLOC);
489 		line_freelist = l;
490 		for (i = 1; i < NALLOC; i++, l++)
491 			l->l_next = l + 1;
492 		l->l_next = NULL;
493 	}
494 	l = line_freelist;
495 	line_freelist = l->l_next;
496 
497 	(void)memset(l, 0, sizeof(LINE));
498 	return (l);
499 }
500 
501 static void
free_line(LINE * l)502 free_line(LINE *l)
503 {
504 
505 	l->l_next = line_freelist;
506 	line_freelist = l;
507 }
508 
509 static void *
xmalloc(void * p,size_t size)510 xmalloc(void *p, size_t size)
511 {
512 	void *q;
513 
514 	if (!(q = (void *)realloc(p, size)))
515 		err(EXIT_FAILURE, "realloc");
516 	p = q;
517 	return (p);
518 }
519 
520 static void
usage(void)521 usage(void)
522 {
523 
524 	(void)fprintf(stderr, "usage: col [-bfpx] [-l nline]\n");
525 	exit(EXIT_FAILURE);
526 }
527 
528 static void
wrerr(void)529 wrerr(void)
530 {
531 
532 	(void)fprintf(stderr, "col: write error.\n");
533 	exit(EXIT_FAILURE);
534 }
535 
536 static void
dowarn(int line)537 dowarn(int line)
538 {
539 
540 	warnx("warning: can't back up %s",
541 		line < 0 ? "past first line" : "-- line already flushed");
542 }
543