xref: /openbsd-src/usr.bin/ul/ul.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: ul.c,v 1.21 2016/03/26 08:59:29 natano Exp $	*/
2 /*	$NetBSD: ul.c,v 1.3 1994/12/07 00:28:24 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1980, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <curses.h>
34 #include <err.h>
35 #include <errno.h>
36 #include <locale.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <term.h>
41 #include <unistd.h>
42 #include <wchar.h>
43 
44 #define	IESC	L'\033'
45 #define	SO	L'\016'
46 #define	SI	L'\017'
47 #define	HFWD	'9'
48 #define	HREV	'8'
49 #define	FREV	'7'
50 #define	MAXBUF	512
51 
52 #define	NORMAL	000
53 #define	ALTSET	001	/* Reverse */
54 #define	SUPERSC	002	/* Dim */
55 #define	SUBSC	004	/* Dim | Ul */
56 #define	UNDERL	010	/* Ul */
57 #define	BOLD	020	/* Bold */
58 #define	INDET	040	/* Indeterminate: either Bold or Ul */
59 
60 int	must_use_uc, must_overstrike;
61 char	*CURS_UP, *CURS_RIGHT, *CURS_LEFT,
62 	*ENTER_STANDOUT, *EXIT_STANDOUT, *ENTER_UNDERLINE, *EXIT_UNDERLINE,
63 	*ENTER_DIM, *ENTER_BOLD, *ENTER_REVERSE, *UNDER_CHAR, *EXIT_ATTRIBUTES;
64 
65 struct	CHAR	{
66 	char	c_mode;
67 	wchar_t	c_char;
68 	int	c_width;
69 	int	c_pos;
70 } ;
71 
72 struct	CHAR	obuf[MAXBUF];
73 int	col, maxcol;
74 int	mode;
75 int	halfpos;
76 int	upln;
77 int	iflag;
78 
79 int	outchar(int);
80 void	initcap(void);
81 void	initbuf(void);
82 void	mfilter(FILE *);
83 void	reverse(void);
84 void	fwd(void);
85 void	flushln(void);
86 void	msetmode(int);
87 void	outc(wchar_t, int);
88 void	overstrike(void);
89 void	iattr(void);
90 
91 #define	PRINT(s) \
92 	do { \
93 		if (s) \
94 			tputs(s, 1, outchar); \
95 	} while (0)
96 
97 int
98 main(int argc, char *argv[])
99 {
100 	extern int optind;
101 	extern char *optarg;
102 	int c;
103 	char *termtype;
104 	FILE *f;
105 	char termcap[1024];
106 
107 	setlocale(LC_CTYPE, "");
108 
109 	if (pledge("stdio rpath tty", NULL) == -1)
110 		err(1, "pledge");
111 
112 	termtype = getenv("TERM");
113 	if (termtype == NULL || (argv[0][0] == 'c' && !isatty(1)))
114 		termtype = "lpr";
115 	while ((c = getopt(argc, argv, "it:T:")) != -1)
116 		switch (c) {
117 		case 't':
118 		case 'T': /* for nroff compatibility */
119 			termtype = optarg;
120 			break;
121 		case 'i':
122 			iflag = 1;
123 			break;
124 
125 		default:
126 			fprintf(stderr,
127 			    "usage: %s [-i] [-t terminal] [file ...]\n",
128 			    argv[0]);
129 			exit(1);
130 		}
131 
132 	switch (tgetent(termcap, termtype)) {
133 	case 1:
134 		break;
135 	default:
136 		warnx("trouble reading termcap");
137 		/* FALLTHROUGH */
138 	case 0:
139 		/* No such terminal type - assume dumb */
140 		(void)strlcpy(termcap, "dumb:os:col#80:cr=^M:sf=^J:am:",
141 		    sizeof termcap);
142 		break;
143 	}
144 	initcap();
145 	if ((tgetflag("os") && ENTER_BOLD == NULL ) ||
146 	    (tgetflag("ul") && ENTER_UNDERLINE == NULL && UNDER_CHAR == NULL))
147 		must_overstrike = 1;
148 	initbuf();
149 	if (optind == argc)
150 		mfilter(stdin);
151 	else for (; optind<argc; optind++) {
152 		f = fopen(argv[optind],"r");
153 		if (f == NULL)
154 			err(1, "%s", argv[optind]);
155 
156 		mfilter(f);
157 		fclose(f);
158 	}
159 	exit(0);
160 }
161 
162 void
163 mfilter(FILE *f)
164 {
165 	struct CHAR	*cp;
166 	wint_t		 c;
167 	int		 skip_bs, w, wt;
168 
169 	col = 1;
170 	skip_bs = 0;
171 	while (col < MAXBUF) {
172 		switch (c = fgetwc(f)) {
173 		case WEOF:
174 			/* Discard invalid bytes. */
175 			if (ferror(f)) {
176 				if (errno != EILSEQ)
177 					err(1, NULL);
178 				clearerr(f);
179 				break;
180 			}
181 
182 			/* End of file. */
183 			if (maxcol)
184 				flushln();
185 			return;
186 
187 		case L'\b':
188 			/*
189 			 * Back up one character position, not one
190 			 * display column, but ignore a second
191 			 * backspace after a double-width character.
192 			 */
193 			if (skip_bs > 0)
194 				skip_bs--;
195 			else if (col > 1)
196 				if (obuf[--col].c_width > 1)
197 					skip_bs = obuf[col].c_width - 1;
198 			continue;
199 
200 		case L'\t':
201 			/* Calculate the target position. */
202 			wt = (obuf[col - 1].c_pos + 8) & ~7;
203 
204 			/* Advance past known positions. */
205 			while ((w = obuf[col].c_pos) > 0 && w <= wt)
206 				col++;
207 
208 			/* Advance beyond the end. */
209 			if (w == 0) {
210 				w = obuf[col - 1].c_pos;
211 				while (w < wt) {
212 					obuf[col].c_width = 1;
213 					obuf[col++].c_pos = ++w;
214 				}
215 			}
216 			if (col > maxcol)
217 				maxcol = col;
218 			break;
219 
220 		case L'\r':
221 			col = 1;
222 			break;
223 
224 		case SO:
225 			mode |= ALTSET;
226 			break;
227 
228 		case SI:
229 			mode &= ~ALTSET;
230 			break;
231 
232 		case IESC:
233 			switch (c = fgetwc(f)) {
234 			case HREV:
235 				if (halfpos == 0) {
236 					mode |= SUPERSC;
237 					halfpos--;
238 				} else if (halfpos > 0) {
239 					mode &= ~SUBSC;
240 					halfpos--;
241 				} else {
242 					halfpos = 0;
243 					reverse();
244 				}
245 				break;
246 			case HFWD:
247 				if (halfpos == 0) {
248 					mode |= SUBSC;
249 					halfpos++;
250 				} else if (halfpos < 0) {
251 					mode &= ~SUPERSC;
252 					halfpos++;
253 				} else {
254 					halfpos = 0;
255 					fwd();
256 				}
257 				break;
258 			case FREV:
259 				reverse();
260 				break;
261 			default:
262 				errx(1, "0%o: unknown escape sequence", c);
263 			}
264 			break;
265 
266 		case L'_':
267 			if (obuf[col].c_char == L'\0') {
268 				obuf[col].c_char = L'_';
269 				obuf[col].c_width = 1;
270 			} else if (obuf[col].c_char == L'_') {
271 				if (obuf[col - 1].c_mode & UNDERL)
272 					obuf[col].c_mode |= UNDERL | mode;
273 				else if (obuf[col - 1].c_mode & BOLD)
274 					obuf[col].c_mode |= BOLD | mode;
275 				else
276 					obuf[col].c_mode |= INDET | mode;
277 			} else
278 				obuf[col].c_mode |= UNDERL | mode;
279 			/* FALLTHROUGH */
280 
281 		case L' ':
282 			if (obuf[col].c_pos == 0) {
283 				obuf[col].c_width = 1;
284 				obuf[col].c_pos = obuf[col - 1].c_pos + 1;
285 			}
286 			col++;
287 			if (col > maxcol)
288 				maxcol = col;
289 			break;
290 
291 		case L'\n':
292 			flushln();
293 			break;
294 
295 		case L'\f':
296 			flushln();
297 			putwchar(L'\f');
298 			break;
299 
300 		default:
301 			/* Discard valid, but non-printable characters. */
302 			if ((w = wcwidth(c)) == -1)
303 				break;
304 
305 			if (obuf[col].c_char == L'\0') {
306 				obuf[col].c_char = c;
307 				obuf[col].c_mode = mode;
308 				obuf[col].c_width = w;
309 				obuf[col].c_pos = obuf[col - 1].c_pos + w;
310 			} else if (obuf[col].c_char == L'_') {
311 				obuf[col].c_char = c;
312 				obuf[col].c_mode |= UNDERL|mode;
313 				obuf[col].c_width = w;
314 				obuf[col].c_pos = obuf[col - 1].c_pos + w;
315 				for (cp = obuf + col; cp[1].c_pos > 0; cp++)
316 					cp[1].c_pos = cp[0].c_pos +
317 					    cp[1].c_width;
318 			} else if (obuf[col].c_char == c)
319 				obuf[col].c_mode |= BOLD|mode;
320 			else
321 				obuf[col].c_mode = mode;
322 			col++;
323 			if (col > maxcol)
324 				maxcol = col;
325 			break;
326 		}
327 		skip_bs = 0;
328 	}
329 }
330 
331 void
332 flushln(void)
333 {
334 	int lastmode, i;
335 	int hadmodes = 0;
336 
337 	for (i = maxcol; i > 0; i--) {
338 		if (obuf[i].c_mode & INDET) {
339 			obuf[i].c_mode &= ~INDET;
340 			if (i < maxcol && obuf[i + 1].c_mode & BOLD)
341 				obuf[i].c_mode |= BOLD;
342 			else
343 				obuf[i].c_mode |= UNDERL;
344 		}
345 	}
346 
347 	lastmode = NORMAL;
348 	for (i = 1; i < maxcol; i++) {
349 		if (obuf[i].c_mode != lastmode) {
350 			hadmodes = 1;
351 			msetmode(obuf[i].c_mode);
352 			lastmode = obuf[i].c_mode;
353 		}
354 		if (obuf[i].c_char == L'\0') {
355 			if (upln)
356 				PRINT(CURS_RIGHT);
357 			else
358 				outc(L' ', 1);
359 		} else
360 			outc(obuf[i].c_char, obuf[i].c_width);
361 	}
362 	if (lastmode != NORMAL)
363 		msetmode(0);
364 	if (must_overstrike && hadmodes)
365 		overstrike();
366 	putwchar(L'\n');
367 	if (iflag && hadmodes)
368 		iattr();
369 	(void)fflush(stdout);
370 	if (upln)
371 		upln--;
372 	initbuf();
373 }
374 
375 /*
376  * For terminals that can overstrike, overstrike underlines and bolds.
377  * We don't do anything with halfline ups and downs, or Greek.
378  */
379 void
380 overstrike(void)
381 {
382 	wchar_t wc;
383 	int i, j, needspace;
384 
385 	putwchar(L'\r');
386 	needspace = 0;
387 	for (i = 1; i < maxcol; i++) {
388 		if (obuf[i].c_mode != UNDERL && obuf[i].c_mode != BOLD) {
389 			needspace += obuf[i].c_width;
390 			continue;
391 		}
392 		while (needspace > 0) {
393 			putwchar(L' ');
394 			needspace--;
395 		}
396 		if (obuf[i].c_mode == BOLD)
397 			putwchar(obuf[i].c_char);
398 		else
399 			for (j = 0; j < obuf[i].c_width; j++)
400 				putwchar(L'_');
401 	}
402 }
403 
404 void
405 iattr(void)
406 {
407 	int i, j, needspace;
408 	char c;
409 
410 	needspace = 0;
411 	for (i = 1; i < maxcol; i++) {
412 		switch (obuf[i].c_mode) {
413 		case NORMAL:
414 			needspace += obuf[i].c_width;
415 			continue;
416 		case ALTSET:
417 			c = 'g';
418 			break;
419 		case SUPERSC:
420 			c = '^';
421 			break;
422 		case SUBSC:
423 			c = 'v';
424 			break;
425 		case UNDERL:
426 			c = '_';
427 			break;
428 		case BOLD:
429 			c = '!';
430 			break;
431 		default:
432 			c = 'X';
433 			break;
434 		}
435 		while (needspace > 0) {
436 			putwchar(L' ');
437 			needspace--;
438 		}
439 		for (j = 0; j < obuf[i].c_width; j++)
440 			putwchar(c);
441 	}
442 	putwchar(L'\n');
443 }
444 
445 void
446 initbuf(void)
447 {
448 	bzero(obuf, sizeof (obuf));	/* depends on NORMAL == 0 */
449 	col = 1;
450 	maxcol = 0;
451 	mode &= ALTSET;
452 }
453 
454 void
455 fwd(void)
456 {
457 	int oldcol, oldmax;
458 
459 	oldcol = col;
460 	oldmax = maxcol;
461 	flushln();
462 	col = oldcol;
463 	maxcol = oldmax;
464 }
465 
466 void
467 reverse(void)
468 {
469 	upln++;
470 	fwd();
471 	PRINT(CURS_UP);
472 	PRINT(CURS_UP);
473 	upln++;
474 }
475 
476 void
477 initcap(void)
478 {
479 	static char tcapbuf[512];
480 	char *bp = tcapbuf;
481 
482 	/* This nonsense attempts to work with both old and new termcap */
483 	CURS_UP =		tgetstr("up", &bp);
484 	CURS_RIGHT =		tgetstr("ri", &bp);
485 	if (CURS_RIGHT == NULL)
486 		CURS_RIGHT =	tgetstr("nd", &bp);
487 	CURS_LEFT =		tgetstr("le", &bp);
488 	if (CURS_LEFT == NULL)
489 		CURS_LEFT =	tgetstr("bc", &bp);
490 	if (CURS_LEFT == NULL && tgetflag("bs"))
491 		CURS_LEFT =	"\b";
492 
493 	ENTER_STANDOUT =	tgetstr("so", &bp);
494 	EXIT_STANDOUT =		tgetstr("se", &bp);
495 	ENTER_UNDERLINE =	tgetstr("us", &bp);
496 	EXIT_UNDERLINE =	tgetstr("ue", &bp);
497 	ENTER_DIM =		tgetstr("mh", &bp);
498 	ENTER_BOLD =		tgetstr("md", &bp);
499 	ENTER_REVERSE =		tgetstr("mr", &bp);
500 	EXIT_ATTRIBUTES =	tgetstr("me", &bp);
501 
502 	if (!ENTER_BOLD && ENTER_REVERSE)
503 		ENTER_BOLD = ENTER_REVERSE;
504 	if (!ENTER_BOLD && ENTER_STANDOUT)
505 		ENTER_BOLD = ENTER_STANDOUT;
506 	if (!ENTER_UNDERLINE && ENTER_STANDOUT) {
507 		ENTER_UNDERLINE = ENTER_STANDOUT;
508 		EXIT_UNDERLINE = EXIT_STANDOUT;
509 	}
510 	if (!ENTER_DIM && ENTER_STANDOUT)
511 		ENTER_DIM = ENTER_STANDOUT;
512 	if (!ENTER_REVERSE && ENTER_STANDOUT)
513 		ENTER_REVERSE = ENTER_STANDOUT;
514 	if (!EXIT_ATTRIBUTES && EXIT_STANDOUT)
515 		EXIT_ATTRIBUTES = EXIT_STANDOUT;
516 
517 	/*
518 	 * Note that we use REVERSE for the alternate character set,
519 	 * not the as/ae capabilities.  This is because we are modelling
520 	 * the model 37 teletype (since that's what nroff outputs) and
521 	 * the typical as/ae is more of a graphics set, not the greek
522 	 * letters the 37 has.
523 	 */
524 
525 	UNDER_CHAR =		tgetstr("uc", &bp);
526 	must_use_uc = (UNDER_CHAR && !ENTER_UNDERLINE);
527 }
528 
529 int
530 outchar(int c)
531 {
532 	return (putwchar(c) != WEOF ? c : EOF);
533 }
534 
535 static int curmode = 0;
536 
537 void
538 outc(wchar_t c, int width)
539 {
540 	int i;
541 
542 	putwchar(c);
543 	if (must_use_uc && (curmode&UNDERL)) {
544 		for (i = 0; i < width; i++)
545 			PRINT(CURS_LEFT);
546 		for (i = 0; i < width; i++)
547 			PRINT(UNDER_CHAR);
548 	}
549 }
550 
551 void
552 msetmode(int newmode)
553 {
554 	if (!iflag) {
555 		if (curmode != NORMAL && newmode != NORMAL)
556 			msetmode(NORMAL);
557 		switch (newmode) {
558 		case NORMAL:
559 			switch(curmode) {
560 			case NORMAL:
561 				break;
562 			case UNDERL:
563 				PRINT(EXIT_UNDERLINE);
564 				break;
565 			default:
566 				/* This includes standout */
567 				PRINT(EXIT_ATTRIBUTES);
568 				break;
569 			}
570 			break;
571 		case ALTSET:
572 			PRINT(ENTER_REVERSE);
573 			break;
574 		case SUPERSC:
575 			/*
576 			 * This only works on a few terminals.
577 			 * It should be fixed.
578 			 */
579 			PRINT(ENTER_UNDERLINE);
580 			PRINT(ENTER_DIM);
581 			break;
582 		case SUBSC:
583 			PRINT(ENTER_DIM);
584 			break;
585 		case UNDERL:
586 			PRINT(ENTER_UNDERLINE);
587 			break;
588 		case BOLD:
589 			PRINT(ENTER_BOLD);
590 			break;
591 		default:
592 			/*
593 			 * We should have some provision here for multiple modes
594 			 * on at once.  This will have to come later.
595 			 */
596 			PRINT(ENTER_STANDOUT);
597 			break;
598 		}
599 	}
600 	curmode = newmode;
601 }
602