xref: /netbsd-src/usr.bin/col/col.c (revision 133e932a29b69063cba00f2e422c587c98b25b82)
1 /*	$NetBSD: col.c,v 1.20 2021/09/10 21:52:17 rillig Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-3-Clause
5  *
6  * Copyright (c) 1990, 1993, 1994
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * Michael Rendell of the Memorial University of Newfoundland.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #include <sys/cdefs.h>
38 #ifndef lint
39 __COPYRIGHT("@(#) Copyright (c) 1990, 1993, 1994\
40  The Regents of the University of California.  All rights reserved.");
41 #endif /* not lint */
42 
43 #ifndef lint
44 #if 0
45 static char sccsid[] = "@(#)col.c	8.5 (Berkeley) 5/4/95";
46 __FBSDID("$FreeBSD: head/usr.bin/col/col.c 366577 2020-10-09 15:27:37Z markj $")
47 ;
48 
49 #endif
50 __RCSID("$NetBSD: col.c,v 1.20 2021/09/10 21:52:17 rillig Exp $");
51 #endif /* not lint */
52 
53 #include <err.h>
54 #include <errno.h>
55 #include <inttypes.h>
56 #include <limits.h>
57 #include <locale.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <termios.h>
62 #include <unistd.h>
63 #include <wchar.h>
64 #include <wctype.h>
65 
66 #define	BS	'\b'		/* backspace */
67 #define	TAB	'\t'		/* tab */
68 #define	SPACE	' '		/* space */
69 #define	NL	'\n'		/* newline */
70 #define	CR	'\r'		/* carriage return */
71 #define	ESC	'\033'		/* escape */
72 #define	SI	'\017'		/* shift in to normal character set */
73 #define	SO	'\016'		/* shift out to alternate character set */
74 #define	VT	'\013'		/* vertical tab (aka reverse line feed) */
75 #define	RLF	'7'		/* ESC-7 reverse line feed */
76 #define	RHLF	'8'		/* ESC-8 reverse half-line feed */
77 #define	FHLF	'9'		/* ESC-9 forward half-line feed */
78 
79 /* build up at least this many lines before flushing them out */
80 #define	BUFFER_MARGIN		32
81 
82 typedef char CSET;
83 
84 typedef struct char_str {
85 #define	CS_NORMAL	1
86 #define	CS_ALTERNATE	2
87 	int		c_column;	/* column character is in */
88 	CSET		c_set;		/* character set (currently only 2) */
89 	wchar_t		c_char;		/* character in question */
90 	int		c_width;	/* character width */
91 } CHAR;
92 
93 typedef struct line_str LINE;
94 struct line_str {
95 	CHAR	*l_line;		/* characters on the line */
96 	LINE	*l_prev;		/* previous line */
97 	LINE	*l_next;		/* next line */
98 	int	l_lsize;		/* allocated sizeof l_line */
99 	int	l_line_len;		/* strlen(l_line) */
100 	int	l_needs_sort;		/* set if chars went in out of order */
101 	int	l_max_col;		/* max column in the line */
102 };
103 
104 static void	addto_lineno(int *, int);
105 static LINE	*alloc_line(void);
106 static void	dowarn(int);
107 static void	flush_line(LINE *);
108 static void	flush_lines(int);
109 static void	flush_blanks(void);
110 static void	free_line(LINE *);
111 __dead static void	usage(void);
112 
113 static CSET	last_set;		/* char_set of last char printed */
114 static LINE	*lines;
115 static int	compress_spaces;	/* if doing space -> tab conversion */
116 static int	fine;			/* if `fine' resolution (half lines) */
117 static int	max_bufd_lines;		/* max # of half lines to keep in memory */
118 static int	nblank_lines;		/* # blanks after last flushed line */
119 static int	no_backspaces;		/* if not to output any backspaces */
120 static int	pass_unknown_seqs;	/* pass unknown control sequences */
121 
122 #define	PUTC(ch) \
123 	do {					\
124 		if (putwchar(ch) == WEOF)	\
125 			errx(EXIT_FAILURE, "write error");	\
126 	} while (0)
127 
128 int
main(int argc,char ** argv)129 main(int argc, char **argv)
130 {
131 	wint_t ch;
132 	CHAR *c;
133 	CSET cur_set;			/* current character set */
134 	LINE *l;			/* current line */
135 	int extra_lines;		/* # of lines above first line */
136 	int cur_col;			/* current column */
137 	int cur_line;			/* line number of current position */
138 	int max_line;			/* max value of cur_line */
139 	int this_line;			/* line l points to */
140 	int nflushd_lines;		/* number of lines that were flushed */
141 	int adjust, opt, warned, width;
142 	int e;
143 
144 	(void)setlocale(LC_CTYPE, "");
145 
146 	max_bufd_lines = 256;
147 	compress_spaces = 1;		/* compress spaces into tabs */
148 	while ((opt = getopt(argc, argv, "bfhl:px")) != -1)
149 		switch (opt) {
150 		case 'b':		/* do not output backspaces */
151 			no_backspaces = 1;
152 			break;
153 		case 'f':		/* allow half forward line feeds */
154 			fine = 1;
155 			break;
156 		case 'h':		/* compress spaces into tabs */
157 			compress_spaces = 1;
158 			break;
159 		case 'l':		/* buffered line count */
160 			max_bufd_lines = (int)strtoi(optarg, NULL, 0, 1,
161 			    (INT_MAX - BUFFER_MARGIN) / 2, &e) * 2;
162 			if (e)
163 				errc(EXIT_FAILURE, e, "bad -l argument `%s'",
164 				    optarg);
165 			break;
166 		case 'p':		/* pass unknown control sequences */
167 			pass_unknown_seqs = 1;
168 			break;
169 		case 'x':		/* do not compress spaces into tabs */
170 			compress_spaces = 0;
171 			break;
172 		case '?':
173 		default:
174 			usage();
175 		}
176 
177 	if (optind != argc)
178 		usage();
179 
180 	adjust = cur_col = extra_lines = warned = 0;
181 	cur_line = max_line = nflushd_lines = this_line = 0;
182 	cur_set = last_set = CS_NORMAL;
183 	lines = l = alloc_line();
184 
185 	while ((ch = getwchar()) != WEOF) {
186 		if (!iswgraph(ch)) {
187 			switch (ch) {
188 			case BS:		/* can't go back further */
189 				if (cur_col == 0)
190 					continue;
191 				--cur_col;
192 				continue;
193 			case CR:
194 				cur_col = 0;
195 				continue;
196 			case ESC:		/* just ignore EOF */
197 				switch(getwchar()) {
198 				/*
199 				 * In the input stream, accept both the
200 				 * XPG5 sequences ESC-digit and the
201 				 * traditional BSD sequences ESC-ctrl.
202 				 */
203 				case '\007':
204 					/* FALLTHROUGH */
205 				case RLF:
206 					addto_lineno(&cur_line, -2);
207 					break;
208 				case '\010':
209 					/* FALLTHROUGH */
210 				case RHLF:
211 					addto_lineno(&cur_line, -1);
212 					break;
213 				case '\011':
214 					/* FALLTHROUGH */
215 				case FHLF:
216 					addto_lineno(&cur_line, 1);
217 					if (cur_line > max_line)
218 						max_line = cur_line;
219 				}
220 				continue;
221 			case NL:
222 				addto_lineno(&cur_line, 2);
223 				if (cur_line > max_line)
224 					max_line = cur_line;
225 				cur_col = 0;
226 				continue;
227 			case SPACE:
228 				++cur_col;
229 				continue;
230 			case SI:
231 				cur_set = CS_NORMAL;
232 				continue;
233 			case SO:
234 				cur_set = CS_ALTERNATE;
235 				continue;
236 			case TAB:		/* adjust column */
237 				cur_col |= 7;
238 				++cur_col;
239 				continue;
240 			case VT:
241 				addto_lineno(&cur_line, -2);
242 				continue;
243 			}
244 			if (iswspace(ch)) {
245 				if ((width = wcwidth(ch)) > 0)
246 					cur_col += width;
247 				continue;
248 			}
249 			if (!pass_unknown_seqs)
250 				continue;
251 		}
252 
253 		/* Must stuff ch in a line - are we at the right one? */
254 		if (cur_line + adjust != this_line) {
255 			LINE *lnew;
256 
257 			/* round up to next line */
258 			adjust = !fine && (cur_line & 1);
259 
260 			if (cur_line + adjust < this_line) {
261 				while (cur_line + adjust < this_line &&
262 				    l->l_prev != NULL) {
263 					l = l->l_prev;
264 					this_line--;
265 				}
266 				if (cur_line + adjust < this_line) {
267 					if (nflushd_lines == 0) {
268 						/*
269 						 * Allow backup past first
270 						 * line if nothing has been
271 						 * flushed yet.
272 						 */
273 						while (cur_line + adjust
274 						    < this_line) {
275 							lnew = alloc_line();
276 							l->l_prev = lnew;
277 							lnew->l_next = l;
278 							l = lines = lnew;
279 							extra_lines++;
280 							this_line--;
281 						}
282 					} else {
283 						if (!warned++)
284 							dowarn(cur_line);
285 						cur_line = this_line - adjust;
286 					}
287 				}
288 			} else {
289 				/* may need to allocate here */
290 				while (cur_line + adjust > this_line) {
291 					if (l->l_next == NULL) {
292 						l->l_next = alloc_line();
293 						l->l_next->l_prev = l;
294 					}
295 					l = l->l_next;
296 					this_line++;
297 				}
298 			}
299 			if (this_line > nflushd_lines &&
300 			    this_line - nflushd_lines >=
301 			    max_bufd_lines + BUFFER_MARGIN) {
302 				if (extra_lines) {
303 					flush_lines(extra_lines);
304 					extra_lines = 0;
305 				}
306 				flush_lines(this_line - nflushd_lines -
307 				    max_bufd_lines);
308 				nflushd_lines = this_line - max_bufd_lines;
309 			}
310 		}
311 		/* grow line's buffer? */
312 		if (l->l_line_len + 1 >= l->l_lsize) {
313 			int need;
314 
315 			need = l->l_lsize ? l->l_lsize * 2 : 90;
316 			if ((l->l_line = realloc(l->l_line,
317 			    (unsigned)need * sizeof(CHAR))) == NULL)
318 				err(EXIT_FAILURE, NULL);
319 			l->l_lsize = need;
320 		}
321 		c = &l->l_line[l->l_line_len++];
322 		c->c_char = ch;
323 		c->c_set = cur_set;
324 		c->c_column = cur_col;
325 		c->c_width = wcwidth(ch);
326 		/*
327 		 * If things are put in out of order, they will need sorting
328 		 * when it is flushed.
329 		 */
330 		if (cur_col < l->l_max_col)
331 			l->l_needs_sort = 1;
332 		else
333 			l->l_max_col = cur_col;
334 		if (c->c_width > 0)
335 			cur_col += c->c_width;
336 	}
337 	if (ferror(stdin))
338 		err(EXIT_FAILURE, NULL);
339 	if (extra_lines) {
340 		/*
341 		 * Extra lines only exist if no lines have been flushed
342 		 * yet. This means that 'lines' must point to line zero
343 		 * after we flush the extra lines.
344 		 */
345 		flush_lines(extra_lines);
346 		l = lines;
347 		this_line = 0;
348 	}
349 
350 	/* goto the last line that had a character on it */
351 	for (; l->l_next; l = l->l_next)
352 		this_line++;
353 	flush_lines(this_line - nflushd_lines + 1);
354 
355 	/* make sure we leave things in a sane state */
356 	if (last_set != CS_NORMAL)
357 		PUTC(SI);
358 
359 	/* flush out the last few blank lines */
360 	if (max_line >= this_line)
361 		nblank_lines = max_line - this_line + (max_line & 1);
362 	if (nblank_lines == 0)
363 		/* end with a newline even if the source doesn't */
364 		nblank_lines = 2;
365 	flush_blanks();
366 	exit(EXIT_SUCCESS);
367 }
368 
369 /*
370  * Prints the first 'nflush' lines. Printed lines are freed.
371  * After this function returns, 'lines' points to the first
372  * of the remaining lines, and 'nblank_lines' will have the
373  * number of half line feeds between the final flushed line
374  * and the first remaining line.
375  */
376 static void
flush_lines(int nflush)377 flush_lines(int nflush)
378 {
379 	LINE *l;
380 
381 	while (--nflush >= 0) {
382 		l = lines;
383 		lines = l->l_next;
384 		if (l->l_line) {
385 			flush_blanks();
386 			flush_line(l);
387 			free(l->l_line);
388 		}
389 		if (l->l_next)
390 			nblank_lines++;
391 		free_line(l);
392 	}
393 	if (lines)
394 		lines->l_prev = NULL;
395 }
396 
397 /*
398  * Print a number of newline/half newlines.
399  * nblank_lines is the number of half line feeds.
400  */
401 static void
flush_blanks(void)402 flush_blanks(void)
403 {
404 	int half, i, nb;
405 
406 	half = 0;
407 	nb = nblank_lines;
408 	if (nb & 1) {
409 		if (fine)
410 			half = 1;
411 		else
412 			nb++;
413 	}
414 	nb /= 2;
415 	for (i = nb; --i >= 0;)
416 		PUTC('\n');
417 	if (half) {
418 		PUTC(ESC);
419 		PUTC(FHLF);
420 		if (!nb)
421 			PUTC('\r');
422 	}
423 	nblank_lines = 0;
424 }
425 
426 /*
427  * Write a line to stdout taking care of space to tab conversion (-h flag)
428  * and character set shifts.
429  */
430 static void
flush_line(LINE * l)431 flush_line(LINE *l)
432 {
433 	CHAR *c, *endc;
434 	int i, j, nchars, last_col, save, this_col, tot;
435 
436 	last_col = 0;
437 	nchars = l->l_line_len;
438 
439 	if (l->l_needs_sort) {
440 		static CHAR *sorted;
441 		static int count_size, *count, sorted_size;
442 
443 		/*
444 		 * Do an O(n) sort on l->l_line by column being careful to
445 		 * preserve the order of characters in the same column.
446 		 */
447 		if (l->l_lsize > sorted_size) {
448 			sorted_size = l->l_lsize;
449 			if ((sorted = realloc(sorted,
450 			    sizeof(CHAR) * (size_t)sorted_size)) == NULL)
451 				err(EXIT_FAILURE, NULL);
452 		}
453 		if (l->l_max_col >= count_size) {
454 			count_size = l->l_max_col + 1;
455 			if ((count = realloc(count,
456 			    sizeof(int) * (size_t)count_size)) == NULL)
457 				err(EXIT_FAILURE, NULL);
458 		}
459 		memset(count, 0, sizeof(int) * (size_t)l->l_max_col + 1);
460 		for (i = nchars, c = l->l_line; --i >= 0; c++)
461 			count[c->c_column]++;
462 
463 		/*
464 		 * calculate running total (shifted down by 1) to use as
465 		 * indices into new line.
466 		 */
467 		for (tot = 0, i = 0; i <= l->l_max_col; i++) {
468 			save = count[i];
469 			count[i] = tot;
470 			tot += save;
471 		}
472 
473 		for (i = nchars, c = l->l_line; --i >= 0; c++)
474 			sorted[count[c->c_column]++] = *c;
475 		c = sorted;
476 	} else
477 		c = l->l_line;
478 	while (nchars > 0) {
479 		this_col = c->c_column;
480 		endc = c;
481 		do {
482 			++endc;
483 		} while (--nchars > 0 && this_col == endc->c_column);
484 
485 		/* if -b only print last character */
486 		if (no_backspaces) {
487 			c = endc - 1;
488 			if (nchars > 0 &&
489 			    this_col + c->c_width > endc->c_column)
490 				continue;
491 		}
492 
493 		if (this_col > last_col) {
494 			int nspace = this_col - last_col;
495 
496 			if (compress_spaces && nspace > 1) {
497 				while (1) {
498 					int tab_col, tab_size;
499 
500 					tab_col = (last_col + 8) & ~7;
501 					if (tab_col > this_col)
502 						break;
503 					tab_size = tab_col - last_col;
504 					if (tab_size == 1)
505 						PUTC(' ');
506 					else
507 						PUTC('\t');
508 					nspace -= tab_size;
509 					last_col = tab_col;
510 				}
511 			}
512 			while (--nspace >= 0)
513 				PUTC(' ');
514 			last_col = this_col;
515 		}
516 
517 		for (;;) {
518 			if (c->c_set != last_set) {
519 				switch (c->c_set) {
520 				case CS_NORMAL:
521 					PUTC(SI);
522 					break;
523 				case CS_ALTERNATE:
524 					PUTC(SO);
525 				}
526 				last_set = c->c_set;
527 			}
528 			PUTC(c->c_char);
529 			if ((c + 1) < endc)
530 				for (j = 0; j < c->c_width; j++)
531 					PUTC('\b');
532 			if (++c >= endc)
533 				break;
534 		}
535 		last_col += (c - 1)->c_width;
536 	}
537 }
538 
539 /*
540  * Increment or decrement a line number, checking for overflow.
541  * Stop one below INT_MAX such that the adjust variable is safe.
542  */
543 void
addto_lineno(int * lno,int offset)544 addto_lineno(int *lno, int offset)
545 {
546 	if (offset > 0) {
547 		if (*lno >= INT_MAX - offset)
548 			errx(EXIT_FAILURE, "too many lines");
549 	} else {
550 		if (*lno < INT_MIN - offset)
551 			errx(EXIT_FAILURE, "too many reverse line feeds");
552 	}
553 	*lno += offset;
554 }
555 
556 #define	NALLOC 64
557 
558 static LINE *line_freelist;
559 
560 static LINE *
alloc_line(void)561 alloc_line(void)
562 {
563 	LINE *l;
564 	int i;
565 
566 	if (!line_freelist) {
567 		if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL)
568 			err(EXIT_FAILURE, NULL);
569 		line_freelist = l;
570 		for (i = 1; i < NALLOC; i++, l++)
571 			l->l_next = l + 1;
572 		l->l_next = NULL;
573 	}
574 	l = line_freelist;
575 	line_freelist = l->l_next;
576 
577 	memset(l, 0, sizeof(LINE));
578 	return (l);
579 }
580 
581 static void
free_line(LINE * l)582 free_line(LINE *l)
583 {
584 
585 	l->l_next = line_freelist;
586 	line_freelist = l;
587 }
588 
589 static void
usage(void)590 usage(void)
591 {
592 
593 	(void)fprintf(stderr, "Usage: %s [-bfhpx] [-l nline]\n", getprogname());
594 	exit(EXIT_FAILURE);
595 }
596 
597 static void
dowarn(int line)598 dowarn(int line)
599 {
600 
601 	warnx("warning: can't back up %s",
602 		line < 0 ? "past first line" : "-- line already flushed");
603 }
604