xref: /openbsd-src/usr.bin/mandoc/term.c (revision 43003dfe3ad45d1698bed8a37f2b0f5b14f20d4f)
1 /*	$Id: term.c,v 1.13 2009/09/21 20:57:57 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <assert.h>
18 #include <err.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include "term.h"
24 #include "man.h"
25 #include "mdoc.h"
26 
27 extern	void		  man_run(struct termp *,
28 				const struct man *);
29 extern	void		  mdoc_run(struct termp *,
30 				const struct mdoc *);
31 
32 static	struct termp	 *term_alloc(enum termenc);
33 static	void		  term_free(struct termp *);
34 
35 static	void		  do_escaped(struct termp *, const char **);
36 static	void		  do_special(struct termp *,
37 				const char *, size_t);
38 static	void		  do_reserved(struct termp *,
39 				const char *, size_t);
40 static	void		  buffer(struct termp *, char);
41 static	void		  encode(struct termp *, char);
42 static	int		  isopendelim(const char *);
43 static	int		  isclosedelim(const char *);
44 
45 
46 void *
47 ascii_alloc(void)
48 {
49 
50 	return(term_alloc(TERMENC_ASCII));
51 }
52 
53 
54 void
55 terminal_man(void *arg, const struct man *man)
56 {
57 	struct termp	*p;
58 
59 	p = (struct termp *)arg;
60 	if (NULL == p->symtab)
61 		p->symtab = term_ascii2htab();
62 
63 	man_run(p, man);
64 }
65 
66 
67 void
68 terminal_mdoc(void *arg, const struct mdoc *mdoc)
69 {
70 	struct termp	*p;
71 
72 	p = (struct termp *)arg;
73 	if (NULL == p->symtab)
74 		p->symtab = term_ascii2htab();
75 
76 	mdoc_run(p, mdoc);
77 }
78 
79 
80 void
81 terminal_free(void *arg)
82 {
83 
84 	term_free((struct termp *)arg);
85 }
86 
87 
88 static void
89 term_free(struct termp *p)
90 {
91 
92 	if (p->buf)
93 		free(p->buf);
94 	if (TERMENC_ASCII == p->enc && p->symtab)
95 		term_asciifree(p->symtab);
96 
97 	free(p);
98 }
99 
100 
101 static struct termp *
102 term_alloc(enum termenc enc)
103 {
104 	struct termp *p;
105 
106 	if (NULL == (p = malloc(sizeof(struct termp))))
107 		err(1, "malloc");
108 	bzero(p, sizeof(struct termp));
109 	p->maxrmargin = 78;
110 	p->enc = enc;
111 	return(p);
112 }
113 
114 
115 static int
116 isclosedelim(const char *p)
117 {
118 
119 	if ( ! (*p && 0 == *(p + 1)))
120 		return(0);
121 
122 	switch (*p) {
123 	case('.'):
124 		/* FALLTHROUGH */
125 	case(','):
126 		/* FALLTHROUGH */
127 	case(';'):
128 		/* FALLTHROUGH */
129 	case(':'):
130 		/* FALLTHROUGH */
131 	case('?'):
132 		/* FALLTHROUGH */
133 	case('!'):
134 		/* FALLTHROUGH */
135 	case(')'):
136 		/* FALLTHROUGH */
137 	case(']'):
138 		/* FALLTHROUGH */
139 	case('}'):
140 		return(1);
141 	default:
142 		break;
143 	}
144 
145 	return(0);
146 }
147 
148 
149 static int
150 isopendelim(const char *p)
151 {
152 
153 	if ( ! (*p && 0 == *(p + 1)))
154 		return(0);
155 
156 	switch (*p) {
157 	case('('):
158 		/* FALLTHROUGH */
159 	case('['):
160 		/* FALLTHROUGH */
161 	case('{'):
162 		return(1);
163 	default:
164 		break;
165 	}
166 
167 	return(0);
168 }
169 
170 
171 /*
172  * Flush a line of text.  A "line" is loosely defined as being something
173  * that should be followed by a newline, regardless of whether it's
174  * broken apart by newlines getting there.  A line can also be a
175  * fragment of a columnar list.
176  *
177  * Specifically, a line is whatever's in p->buf of length p->col, which
178  * is zeroed after this function returns.
179  *
180  * The usage of termp:flags is as follows:
181  *
182  *  - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
183  *    offset value.  This is useful when doing columnar lists where the
184  *    prior column has right-padded.
185  *
186  *  - TERMP_NOBREAK: this is the most important and is used when making
187  *    columns.  In short: don't print a newline and instead pad to the
188  *    right margin.  Used in conjunction with TERMP_NOLPAD.
189  *
190  *  - TERMP_TWOSPACE: when padding, make sure there are at least two
191  *    space characters of padding.  Otherwise, rather break the line.
192  *
193  *  - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
194  *    the line is overrun, and don't pad-right if it's underrun.
195  *
196  *  - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
197  *    overruning, instead save the position and continue at that point
198  *    when the next invocation.
199  *
200  *  In-line line breaking:
201  *
202  *  If TERMP_NOBREAK is specified and the line overruns the right
203  *  margin, it will break and pad-right to the right margin after
204  *  writing.  If maxrmargin is violated, it will break and continue
205  *  writing from the right-margin, which will lead to the above
206  *  scenario upon exit.
207  *
208  *  Otherwise, the line will break at the right margin.  Extremely long
209  *  lines will cause the system to emit a warning (TODO: hyphenate, if
210  *  possible).
211  */
212 void
213 term_flushln(struct termp *p)
214 {
215 	int		 i, j;
216 	size_t		 vbl, vsz, vis, maxvis, mmax, bp;
217 	static int	 overstep = 0;
218 
219 	/*
220 	 * First, establish the maximum columns of "visible" content.
221 	 * This is usually the difference between the right-margin and
222 	 * an indentation, but can be, for tagged lists or columns, a
223 	 * small set of values.
224 	 */
225 
226 	assert(p->offset < p->rmargin);
227 	assert((int)(p->rmargin - p->offset) - overstep > 0);
228 
229 	maxvis = /* LINTED */
230 		p->rmargin - p->offset - overstep;
231 	mmax = /* LINTED */
232 		p->maxrmargin - p->offset - overstep;
233 
234 	bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
235 	vis = 0;
236 	overstep = 0;
237 
238 	/*
239 	 * If in the standard case (left-justified), then begin with our
240 	 * indentation, otherwise (columns, etc.) just start spitting
241 	 * out text.
242 	 */
243 
244 	if ( ! (p->flags & TERMP_NOLPAD))
245 		/* LINTED */
246 		for (j = 0; j < (int)p->offset; j++)
247 			putchar(' ');
248 
249 	for (i = 0; i < (int)p->col; i++) {
250 		/*
251 		 * Count up visible word characters.  Control sequences
252 		 * (starting with the CSI) aren't counted.  A space
253 		 * generates a non-printing word, which is valid (the
254 		 * space is printed according to regular spacing rules).
255 		 */
256 
257 		/* LINTED */
258 		for (j = i, vsz = 0; j < (int)p->col; j++) {
259 			if (j && ' ' == p->buf[j])
260 				break;
261 			else if (8 == p->buf[j])
262 				vsz--;
263 			else
264 				vsz++;
265 		}
266 
267 		/*
268 		 * Choose the number of blanks to prepend: no blank at the
269 		 * beginning of a line, one between words -- but do not
270 		 * actually write them yet.
271 		 */
272 		vbl = (size_t)(0 == vis ? 0 : 1);
273 
274 		/*
275 		 * Find out whether we would exceed the right margin.
276 		 * If so, break to the next line.  (TODO: hyphenate)
277 		 * Otherwise, write the chosen number of blanks now.
278 		 */
279 		if (vis && vis + vbl + vsz > bp) {
280 			putchar('\n');
281 			if (TERMP_NOBREAK & p->flags) {
282 				for (j = 0; j < (int)p->rmargin; j++)
283 					putchar(' ');
284 				vis = p->rmargin - p->offset;
285 			} else {
286 				for (j = 0; j < (int)p->offset; j++)
287 					putchar(' ');
288 				vis = 0;
289 			}
290 		} else {
291 			for (j = 0; j < (int)vbl; j++)
292 				putchar(' ');
293 			vis += vbl;
294 		}
295 
296 		/*
297 		 * Finally, write out the word.
298 		 */
299 		for ( ; i < (int)p->col; i++) {
300 			if (' ' == p->buf[i])
301 				break;
302 			putchar(p->buf[i]);
303 		}
304 		vis += vsz;
305 	}
306 	p->col = 0;
307 
308 	if ( ! (TERMP_NOBREAK & p->flags)) {
309 		putchar('\n');
310 		return;
311 	}
312 
313 	if (TERMP_HANG & p->flags) {
314 		/* We need one blank after the tag. */
315 		overstep = /* LINTED */
316 			vis - maxvis + 1;
317 
318 		/*
319 		 * Behave exactly the same way as groff:
320 		 * If we have overstepped the margin, temporarily move
321 		 * it to the right and flag the rest of the line to be
322 		 * shorter.
323 		 * If we landed right at the margin, be happy.
324 		 * If we are one step before the margin, temporarily
325 		 * move it one step LEFT and flag the rest of the line
326 		 * to be longer.
327 		 */
328 		if (overstep >= -1) {
329 			assert((int)maxvis + overstep >= 0);
330 			/* LINTED */
331 			maxvis += overstep;
332 		} else
333 			overstep = 0;
334 
335 	} else if (TERMP_DANGLE & p->flags)
336 		return;
337 
338 	/* Right-pad. */
339 	if (maxvis > vis + /* LINTED */
340 			((TERMP_TWOSPACE & p->flags) ? 1 : 0))
341 		for ( ; vis < maxvis; vis++)
342 			putchar(' ');
343 	else {	/* ...or newline break. */
344 		putchar('\n');
345 		for (i = 0; i < (int)p->rmargin; i++)
346 			putchar(' ');
347 	}
348 }
349 
350 
351 /*
352  * A newline only breaks an existing line; it won't assert vertical
353  * space.  All data in the output buffer is flushed prior to the newline
354  * assertion.
355  */
356 void
357 term_newln(struct termp *p)
358 {
359 
360 	p->flags |= TERMP_NOSPACE;
361 	if (0 == p->col) {
362 		p->flags &= ~TERMP_NOLPAD;
363 		return;
364 	}
365 	term_flushln(p);
366 	p->flags &= ~TERMP_NOLPAD;
367 }
368 
369 
370 /*
371  * Asserts a vertical space (a full, empty line-break between lines).
372  * Note that if used twice, this will cause two blank spaces and so on.
373  * All data in the output buffer is flushed prior to the newline
374  * assertion.
375  */
376 void
377 term_vspace(struct termp *p)
378 {
379 
380 	term_newln(p);
381 	putchar('\n');
382 }
383 
384 
385 static void
386 do_special(struct termp *p, const char *word, size_t len)
387 {
388 	const char	*rhs;
389 	size_t		 sz;
390 	int		 i;
391 
392 	rhs = term_a2ascii(p->symtab, word, len, &sz);
393 
394 	if (NULL == rhs) {
395 #if 0
396 		fputs("Unknown special character: ", stderr);
397 		for (i = 0; i < (int)len; i++)
398 			fputc(word[i], stderr);
399 		fputc('\n', stderr);
400 #endif
401 		return;
402 	}
403 	for (i = 0; i < (int)sz; i++)
404 		encode(p, rhs[i]);
405 }
406 
407 
408 static void
409 do_reserved(struct termp *p, const char *word, size_t len)
410 {
411 	const char	*rhs;
412 	size_t		 sz;
413 	int		 i;
414 
415 	rhs = term_a2res(p->symtab, word, len, &sz);
416 
417 	if (NULL == rhs) {
418 #if 0
419 		fputs("Unknown reserved word: ", stderr);
420 		for (i = 0; i < (int)len; i++)
421 			fputc(word[i], stderr);
422 		fputc('\n', stderr);
423 #endif
424 		return;
425 	}
426 	for (i = 0; i < (int)sz; i++)
427 		encode(p, rhs[i]);
428 }
429 
430 
431 /*
432  * Handle an escape sequence: determine its length and pass it to the
433  * escape-symbol look table.  Note that we assume mdoc(3) has validated
434  * the escape sequence (we assert upon badly-formed escape sequences).
435  */
436 static void
437 do_escaped(struct termp *p, const char **word)
438 {
439 	int		 j, type;
440 	const char	*wp;
441 
442 	wp = *word;
443 	type = 1;
444 
445 	if (0 == *(++wp)) {
446 		*word = wp;
447 		return;
448 	}
449 
450 	if ('(' == *wp) {
451 		wp++;
452 		if (0 == *wp || 0 == *(wp + 1)) {
453 			*word = 0 == *wp ? wp : wp + 1;
454 			return;
455 		}
456 
457 		do_special(p, wp, 2);
458 		*word = ++wp;
459 		return;
460 
461 	} else if ('*' == *wp) {
462 		if (0 == *(++wp)) {
463 			*word = wp;
464 			return;
465 		}
466 
467 		switch (*wp) {
468 		case ('('):
469 			wp++;
470 			if (0 == *wp || 0 == *(wp + 1)) {
471 				*word = 0 == *wp ? wp : wp + 1;
472 				return;
473 			}
474 
475 			do_reserved(p, wp, 2);
476 			*word = ++wp;
477 			return;
478 		case ('['):
479 			type = 0;
480 			break;
481 		default:
482 			do_reserved(p, wp, 1);
483 			*word = wp;
484 			return;
485 		}
486 
487 	} else if ('f' == *wp) {
488 		if (0 == *(++wp)) {
489 			*word = wp;
490 			return;
491 		}
492 
493 		switch (*wp) {
494 		case ('B'):
495 			p->bold++;
496 			break;
497 		case ('I'):
498 			p->under++;
499 			break;
500 		case ('P'):
501 			/* FALLTHROUGH */
502 		case ('R'):
503 			p->bold = p->under = 0;
504 			break;
505 		default:
506 			break;
507 		}
508 
509 		*word = wp;
510 		return;
511 
512 	} else if ('[' != *wp) {
513 		do_special(p, wp, 1);
514 		*word = wp;
515 		return;
516 	}
517 
518 	wp++;
519 	for (j = 0; *wp && ']' != *wp; wp++, j++)
520 		/* Loop... */ ;
521 
522 	if (0 == *wp) {
523 		*word = wp;
524 		return;
525 	}
526 
527 	if (type)
528 		do_special(p, wp - j, (size_t)j);
529 	else
530 		do_reserved(p, wp - j, (size_t)j);
531 	*word = wp;
532 }
533 
534 
535 /*
536  * Handle pwords, partial words, which may be either a single word or a
537  * phrase that cannot be broken down (such as a literal string).  This
538  * handles word styling.
539  */
540 void
541 term_word(struct termp *p, const char *word)
542 {
543 	const char	 *sv;
544 
545 	if (isclosedelim(word))
546 		if ( ! (TERMP_IGNDELIM & p->flags))
547 			p->flags |= TERMP_NOSPACE;
548 
549 	if ( ! (TERMP_NOSPACE & p->flags))
550 		buffer(p, ' ');
551 
552 	if ( ! (p->flags & TERMP_NONOSPACE))
553 		p->flags &= ~TERMP_NOSPACE;
554 
555 	for (sv = word; *word; word++)
556 		if ('\\' != *word)
557 			encode(p, *word);
558 		else
559 			do_escaped(p, &word);
560 
561 	if (isopendelim(sv))
562 		p->flags |= TERMP_NOSPACE;
563 }
564 
565 
566 /*
567  * Insert a single character into the line-buffer.  If the buffer's
568  * space is exceeded, then allocate more space by doubling the buffer
569  * size.
570  */
571 static void
572 buffer(struct termp *p, char c)
573 {
574 	size_t		 s;
575 
576 	if (p->col + 1 >= p->maxcols) {
577 		if (0 == p->maxcols)
578 			p->maxcols = 256;
579 		s = p->maxcols * 2;
580 		p->buf = realloc(p->buf, s);
581 		if (NULL == p->buf)
582 			err(1, "realloc");
583 		p->maxcols = s;
584 	}
585 	p->buf[(int)(p->col)++] = c;
586 }
587 
588 
589 static void
590 encode(struct termp *p, char c)
591 {
592 
593 	if (' ' != c) {
594 		if (p->bold) {
595 			buffer(p, c);
596 			buffer(p, 8);
597 		}
598 		if (p->under) {
599 			buffer(p, '_');
600 			buffer(p, 8);
601 		}
602 	}
603 	buffer(p, c);
604 }
605