1 /* $NetBSD: col.c,v 1.20 2021/09/10 21:52:17 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-3-Clause
5 *
6 * Copyright (c) 1990, 1993, 1994
7 * The Regents of the University of California. All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Michael Rendell of the Memorial University of Newfoundland.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #include <sys/cdefs.h>
38 #ifndef lint
39 __COPYRIGHT("@(#) Copyright (c) 1990, 1993, 1994\
40 The Regents of the University of California. All rights reserved.");
41 #endif /* not lint */
42
43 #ifndef lint
44 #if 0
45 static char sccsid[] = "@(#)col.c 8.5 (Berkeley) 5/4/95";
46 __FBSDID("$FreeBSD: head/usr.bin/col/col.c 366577 2020-10-09 15:27:37Z markj $")
47 ;
48
49 #endif
50 __RCSID("$NetBSD: col.c,v 1.20 2021/09/10 21:52:17 rillig Exp $");
51 #endif /* not lint */
52
53 #include <err.h>
54 #include <errno.h>
55 #include <inttypes.h>
56 #include <limits.h>
57 #include <locale.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <termios.h>
62 #include <unistd.h>
63 #include <wchar.h>
64 #include <wctype.h>
65
66 #define BS '\b' /* backspace */
67 #define TAB '\t' /* tab */
68 #define SPACE ' ' /* space */
69 #define NL '\n' /* newline */
70 #define CR '\r' /* carriage return */
71 #define ESC '\033' /* escape */
72 #define SI '\017' /* shift in to normal character set */
73 #define SO '\016' /* shift out to alternate character set */
74 #define VT '\013' /* vertical tab (aka reverse line feed) */
75 #define RLF '7' /* ESC-7 reverse line feed */
76 #define RHLF '8' /* ESC-8 reverse half-line feed */
77 #define FHLF '9' /* ESC-9 forward half-line feed */
78
79 /* build up at least this many lines before flushing them out */
80 #define BUFFER_MARGIN 32
81
82 typedef char CSET;
83
84 typedef struct char_str {
85 #define CS_NORMAL 1
86 #define CS_ALTERNATE 2
87 int c_column; /* column character is in */
88 CSET c_set; /* character set (currently only 2) */
89 wchar_t c_char; /* character in question */
90 int c_width; /* character width */
91 } CHAR;
92
93 typedef struct line_str LINE;
94 struct line_str {
95 CHAR *l_line; /* characters on the line */
96 LINE *l_prev; /* previous line */
97 LINE *l_next; /* next line */
98 int l_lsize; /* allocated sizeof l_line */
99 int l_line_len; /* strlen(l_line) */
100 int l_needs_sort; /* set if chars went in out of order */
101 int l_max_col; /* max column in the line */
102 };
103
104 static void addto_lineno(int *, int);
105 static LINE *alloc_line(void);
106 static void dowarn(int);
107 static void flush_line(LINE *);
108 static void flush_lines(int);
109 static void flush_blanks(void);
110 static void free_line(LINE *);
111 __dead static void usage(void);
112
113 static CSET last_set; /* char_set of last char printed */
114 static LINE *lines;
115 static int compress_spaces; /* if doing space -> tab conversion */
116 static int fine; /* if `fine' resolution (half lines) */
117 static int max_bufd_lines; /* max # of half lines to keep in memory */
118 static int nblank_lines; /* # blanks after last flushed line */
119 static int no_backspaces; /* if not to output any backspaces */
120 static int pass_unknown_seqs; /* pass unknown control sequences */
121
122 #define PUTC(ch) \
123 do { \
124 if (putwchar(ch) == WEOF) \
125 errx(EXIT_FAILURE, "write error"); \
126 } while (0)
127
128 int
main(int argc,char ** argv)129 main(int argc, char **argv)
130 {
131 wint_t ch;
132 CHAR *c;
133 CSET cur_set; /* current character set */
134 LINE *l; /* current line */
135 int extra_lines; /* # of lines above first line */
136 int cur_col; /* current column */
137 int cur_line; /* line number of current position */
138 int max_line; /* max value of cur_line */
139 int this_line; /* line l points to */
140 int nflushd_lines; /* number of lines that were flushed */
141 int adjust, opt, warned, width;
142 int e;
143
144 (void)setlocale(LC_CTYPE, "");
145
146 max_bufd_lines = 256;
147 compress_spaces = 1; /* compress spaces into tabs */
148 while ((opt = getopt(argc, argv, "bfhl:px")) != -1)
149 switch (opt) {
150 case 'b': /* do not output backspaces */
151 no_backspaces = 1;
152 break;
153 case 'f': /* allow half forward line feeds */
154 fine = 1;
155 break;
156 case 'h': /* compress spaces into tabs */
157 compress_spaces = 1;
158 break;
159 case 'l': /* buffered line count */
160 max_bufd_lines = (int)strtoi(optarg, NULL, 0, 1,
161 (INT_MAX - BUFFER_MARGIN) / 2, &e) * 2;
162 if (e)
163 errc(EXIT_FAILURE, e, "bad -l argument `%s'",
164 optarg);
165 break;
166 case 'p': /* pass unknown control sequences */
167 pass_unknown_seqs = 1;
168 break;
169 case 'x': /* do not compress spaces into tabs */
170 compress_spaces = 0;
171 break;
172 case '?':
173 default:
174 usage();
175 }
176
177 if (optind != argc)
178 usage();
179
180 adjust = cur_col = extra_lines = warned = 0;
181 cur_line = max_line = nflushd_lines = this_line = 0;
182 cur_set = last_set = CS_NORMAL;
183 lines = l = alloc_line();
184
185 while ((ch = getwchar()) != WEOF) {
186 if (!iswgraph(ch)) {
187 switch (ch) {
188 case BS: /* can't go back further */
189 if (cur_col == 0)
190 continue;
191 --cur_col;
192 continue;
193 case CR:
194 cur_col = 0;
195 continue;
196 case ESC: /* just ignore EOF */
197 switch(getwchar()) {
198 /*
199 * In the input stream, accept both the
200 * XPG5 sequences ESC-digit and the
201 * traditional BSD sequences ESC-ctrl.
202 */
203 case '\007':
204 /* FALLTHROUGH */
205 case RLF:
206 addto_lineno(&cur_line, -2);
207 break;
208 case '\010':
209 /* FALLTHROUGH */
210 case RHLF:
211 addto_lineno(&cur_line, -1);
212 break;
213 case '\011':
214 /* FALLTHROUGH */
215 case FHLF:
216 addto_lineno(&cur_line, 1);
217 if (cur_line > max_line)
218 max_line = cur_line;
219 }
220 continue;
221 case NL:
222 addto_lineno(&cur_line, 2);
223 if (cur_line > max_line)
224 max_line = cur_line;
225 cur_col = 0;
226 continue;
227 case SPACE:
228 ++cur_col;
229 continue;
230 case SI:
231 cur_set = CS_NORMAL;
232 continue;
233 case SO:
234 cur_set = CS_ALTERNATE;
235 continue;
236 case TAB: /* adjust column */
237 cur_col |= 7;
238 ++cur_col;
239 continue;
240 case VT:
241 addto_lineno(&cur_line, -2);
242 continue;
243 }
244 if (iswspace(ch)) {
245 if ((width = wcwidth(ch)) > 0)
246 cur_col += width;
247 continue;
248 }
249 if (!pass_unknown_seqs)
250 continue;
251 }
252
253 /* Must stuff ch in a line - are we at the right one? */
254 if (cur_line + adjust != this_line) {
255 LINE *lnew;
256
257 /* round up to next line */
258 adjust = !fine && (cur_line & 1);
259
260 if (cur_line + adjust < this_line) {
261 while (cur_line + adjust < this_line &&
262 l->l_prev != NULL) {
263 l = l->l_prev;
264 this_line--;
265 }
266 if (cur_line + adjust < this_line) {
267 if (nflushd_lines == 0) {
268 /*
269 * Allow backup past first
270 * line if nothing has been
271 * flushed yet.
272 */
273 while (cur_line + adjust
274 < this_line) {
275 lnew = alloc_line();
276 l->l_prev = lnew;
277 lnew->l_next = l;
278 l = lines = lnew;
279 extra_lines++;
280 this_line--;
281 }
282 } else {
283 if (!warned++)
284 dowarn(cur_line);
285 cur_line = this_line - adjust;
286 }
287 }
288 } else {
289 /* may need to allocate here */
290 while (cur_line + adjust > this_line) {
291 if (l->l_next == NULL) {
292 l->l_next = alloc_line();
293 l->l_next->l_prev = l;
294 }
295 l = l->l_next;
296 this_line++;
297 }
298 }
299 if (this_line > nflushd_lines &&
300 this_line - nflushd_lines >=
301 max_bufd_lines + BUFFER_MARGIN) {
302 if (extra_lines) {
303 flush_lines(extra_lines);
304 extra_lines = 0;
305 }
306 flush_lines(this_line - nflushd_lines -
307 max_bufd_lines);
308 nflushd_lines = this_line - max_bufd_lines;
309 }
310 }
311 /* grow line's buffer? */
312 if (l->l_line_len + 1 >= l->l_lsize) {
313 int need;
314
315 need = l->l_lsize ? l->l_lsize * 2 : 90;
316 if ((l->l_line = realloc(l->l_line,
317 (unsigned)need * sizeof(CHAR))) == NULL)
318 err(EXIT_FAILURE, NULL);
319 l->l_lsize = need;
320 }
321 c = &l->l_line[l->l_line_len++];
322 c->c_char = ch;
323 c->c_set = cur_set;
324 c->c_column = cur_col;
325 c->c_width = wcwidth(ch);
326 /*
327 * If things are put in out of order, they will need sorting
328 * when it is flushed.
329 */
330 if (cur_col < l->l_max_col)
331 l->l_needs_sort = 1;
332 else
333 l->l_max_col = cur_col;
334 if (c->c_width > 0)
335 cur_col += c->c_width;
336 }
337 if (ferror(stdin))
338 err(EXIT_FAILURE, NULL);
339 if (extra_lines) {
340 /*
341 * Extra lines only exist if no lines have been flushed
342 * yet. This means that 'lines' must point to line zero
343 * after we flush the extra lines.
344 */
345 flush_lines(extra_lines);
346 l = lines;
347 this_line = 0;
348 }
349
350 /* goto the last line that had a character on it */
351 for (; l->l_next; l = l->l_next)
352 this_line++;
353 flush_lines(this_line - nflushd_lines + 1);
354
355 /* make sure we leave things in a sane state */
356 if (last_set != CS_NORMAL)
357 PUTC(SI);
358
359 /* flush out the last few blank lines */
360 if (max_line >= this_line)
361 nblank_lines = max_line - this_line + (max_line & 1);
362 if (nblank_lines == 0)
363 /* end with a newline even if the source doesn't */
364 nblank_lines = 2;
365 flush_blanks();
366 exit(EXIT_SUCCESS);
367 }
368
369 /*
370 * Prints the first 'nflush' lines. Printed lines are freed.
371 * After this function returns, 'lines' points to the first
372 * of the remaining lines, and 'nblank_lines' will have the
373 * number of half line feeds between the final flushed line
374 * and the first remaining line.
375 */
376 static void
flush_lines(int nflush)377 flush_lines(int nflush)
378 {
379 LINE *l;
380
381 while (--nflush >= 0) {
382 l = lines;
383 lines = l->l_next;
384 if (l->l_line) {
385 flush_blanks();
386 flush_line(l);
387 free(l->l_line);
388 }
389 if (l->l_next)
390 nblank_lines++;
391 free_line(l);
392 }
393 if (lines)
394 lines->l_prev = NULL;
395 }
396
397 /*
398 * Print a number of newline/half newlines.
399 * nblank_lines is the number of half line feeds.
400 */
401 static void
flush_blanks(void)402 flush_blanks(void)
403 {
404 int half, i, nb;
405
406 half = 0;
407 nb = nblank_lines;
408 if (nb & 1) {
409 if (fine)
410 half = 1;
411 else
412 nb++;
413 }
414 nb /= 2;
415 for (i = nb; --i >= 0;)
416 PUTC('\n');
417 if (half) {
418 PUTC(ESC);
419 PUTC(FHLF);
420 if (!nb)
421 PUTC('\r');
422 }
423 nblank_lines = 0;
424 }
425
426 /*
427 * Write a line to stdout taking care of space to tab conversion (-h flag)
428 * and character set shifts.
429 */
430 static void
flush_line(LINE * l)431 flush_line(LINE *l)
432 {
433 CHAR *c, *endc;
434 int i, j, nchars, last_col, save, this_col, tot;
435
436 last_col = 0;
437 nchars = l->l_line_len;
438
439 if (l->l_needs_sort) {
440 static CHAR *sorted;
441 static int count_size, *count, sorted_size;
442
443 /*
444 * Do an O(n) sort on l->l_line by column being careful to
445 * preserve the order of characters in the same column.
446 */
447 if (l->l_lsize > sorted_size) {
448 sorted_size = l->l_lsize;
449 if ((sorted = realloc(sorted,
450 sizeof(CHAR) * (size_t)sorted_size)) == NULL)
451 err(EXIT_FAILURE, NULL);
452 }
453 if (l->l_max_col >= count_size) {
454 count_size = l->l_max_col + 1;
455 if ((count = realloc(count,
456 sizeof(int) * (size_t)count_size)) == NULL)
457 err(EXIT_FAILURE, NULL);
458 }
459 memset(count, 0, sizeof(int) * (size_t)l->l_max_col + 1);
460 for (i = nchars, c = l->l_line; --i >= 0; c++)
461 count[c->c_column]++;
462
463 /*
464 * calculate running total (shifted down by 1) to use as
465 * indices into new line.
466 */
467 for (tot = 0, i = 0; i <= l->l_max_col; i++) {
468 save = count[i];
469 count[i] = tot;
470 tot += save;
471 }
472
473 for (i = nchars, c = l->l_line; --i >= 0; c++)
474 sorted[count[c->c_column]++] = *c;
475 c = sorted;
476 } else
477 c = l->l_line;
478 while (nchars > 0) {
479 this_col = c->c_column;
480 endc = c;
481 do {
482 ++endc;
483 } while (--nchars > 0 && this_col == endc->c_column);
484
485 /* if -b only print last character */
486 if (no_backspaces) {
487 c = endc - 1;
488 if (nchars > 0 &&
489 this_col + c->c_width > endc->c_column)
490 continue;
491 }
492
493 if (this_col > last_col) {
494 int nspace = this_col - last_col;
495
496 if (compress_spaces && nspace > 1) {
497 while (1) {
498 int tab_col, tab_size;
499
500 tab_col = (last_col + 8) & ~7;
501 if (tab_col > this_col)
502 break;
503 tab_size = tab_col - last_col;
504 if (tab_size == 1)
505 PUTC(' ');
506 else
507 PUTC('\t');
508 nspace -= tab_size;
509 last_col = tab_col;
510 }
511 }
512 while (--nspace >= 0)
513 PUTC(' ');
514 last_col = this_col;
515 }
516
517 for (;;) {
518 if (c->c_set != last_set) {
519 switch (c->c_set) {
520 case CS_NORMAL:
521 PUTC(SI);
522 break;
523 case CS_ALTERNATE:
524 PUTC(SO);
525 }
526 last_set = c->c_set;
527 }
528 PUTC(c->c_char);
529 if ((c + 1) < endc)
530 for (j = 0; j < c->c_width; j++)
531 PUTC('\b');
532 if (++c >= endc)
533 break;
534 }
535 last_col += (c - 1)->c_width;
536 }
537 }
538
539 /*
540 * Increment or decrement a line number, checking for overflow.
541 * Stop one below INT_MAX such that the adjust variable is safe.
542 */
543 void
addto_lineno(int * lno,int offset)544 addto_lineno(int *lno, int offset)
545 {
546 if (offset > 0) {
547 if (*lno >= INT_MAX - offset)
548 errx(EXIT_FAILURE, "too many lines");
549 } else {
550 if (*lno < INT_MIN - offset)
551 errx(EXIT_FAILURE, "too many reverse line feeds");
552 }
553 *lno += offset;
554 }
555
556 #define NALLOC 64
557
558 static LINE *line_freelist;
559
560 static LINE *
alloc_line(void)561 alloc_line(void)
562 {
563 LINE *l;
564 int i;
565
566 if (!line_freelist) {
567 if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL)
568 err(EXIT_FAILURE, NULL);
569 line_freelist = l;
570 for (i = 1; i < NALLOC; i++, l++)
571 l->l_next = l + 1;
572 l->l_next = NULL;
573 }
574 l = line_freelist;
575 line_freelist = l->l_next;
576
577 memset(l, 0, sizeof(LINE));
578 return (l);
579 }
580
581 static void
free_line(LINE * l)582 free_line(LINE *l)
583 {
584
585 l->l_next = line_freelist;
586 line_freelist = l;
587 }
588
589 static void
usage(void)590 usage(void)
591 {
592
593 (void)fprintf(stderr, "Usage: %s [-bfhpx] [-l nline]\n", getprogname());
594 exit(EXIT_FAILURE);
595 }
596
597 static void
dowarn(int line)598 dowarn(int line)
599 {
600
601 warnx("warning: can't back up %s",
602 line < 0 ? "past first line" : "-- line already flushed");
603 }
604