xref: /openbsd-src/usr.bin/mandoc/term.c (revision 7bbe964f6b7d22ad07ca46292495604f942eba4e)
1 /*	$Id: term.c,v 1.18 2009/10/27 21:40:07 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <assert.h>
18 #include <err.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include "chars.h"
24 #include "out.h"
25 #include "term.h"
26 #include "man.h"
27 #include "mdoc.h"
28 #include "main.h"
29 
30 /* FIXME: accomodate non-breaking, non-collapsing white-space. */
31 /* FIXME: accomodate non-breaking, collapsing white-space. */
32 
33 static	struct termp	 *term_alloc(enum termenc);
34 static	void		  term_free(struct termp *);
35 
36 static	void		  do_escaped(struct termp *, const char **);
37 static	void		  do_special(struct termp *,
38 				const char *, size_t);
39 static	void		  do_reserved(struct termp *,
40 				const char *, size_t);
41 static	void		  buffer(struct termp *, char);
42 static	void		  encode(struct termp *, char);
43 
44 
45 void *
46 ascii_alloc(void)
47 {
48 
49 	return(term_alloc(TERMENC_ASCII));
50 }
51 
52 
53 void
54 terminal_free(void *arg)
55 {
56 
57 	term_free((struct termp *)arg);
58 }
59 
60 
61 static void
62 term_free(struct termp *p)
63 {
64 
65 	if (p->buf)
66 		free(p->buf);
67 	if (p->symtab)
68 		chars_free(p->symtab);
69 
70 	free(p);
71 }
72 
73 
74 static struct termp *
75 term_alloc(enum termenc enc)
76 {
77 	struct termp *p;
78 
79 	if (NULL == (p = malloc(sizeof(struct termp))))
80 		return(NULL);
81 	bzero(p, sizeof(struct termp));
82 	p->maxrmargin = 78;
83 	p->enc = enc;
84 	return(p);
85 }
86 
87 
88 /*
89  * Flush a line of text.  A "line" is loosely defined as being something
90  * that should be followed by a newline, regardless of whether it's
91  * broken apart by newlines getting there.  A line can also be a
92  * fragment of a columnar list.
93  *
94  * Specifically, a line is whatever's in p->buf of length p->col, which
95  * is zeroed after this function returns.
96  *
97  * The usage of termp:flags is as follows:
98  *
99  *  - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
100  *    offset value.  This is useful when doing columnar lists where the
101  *    prior column has right-padded.
102  *
103  *  - TERMP_NOBREAK: this is the most important and is used when making
104  *    columns.  In short: don't print a newline and instead pad to the
105  *    right margin.  Used in conjunction with TERMP_NOLPAD.
106  *
107  *  - TERMP_TWOSPACE: when padding, make sure there are at least two
108  *    space characters of padding.  Otherwise, rather break the line.
109  *
110  *  - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
111  *    the line is overrun, and don't pad-right if it's underrun.
112  *
113  *  - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
114  *    overruning, instead save the position and continue at that point
115  *    when the next invocation.
116  *
117  *  In-line line breaking:
118  *
119  *  If TERMP_NOBREAK is specified and the line overruns the right
120  *  margin, it will break and pad-right to the right margin after
121  *  writing.  If maxrmargin is violated, it will break and continue
122  *  writing from the right-margin, which will lead to the above
123  *  scenario upon exit.
124  *
125  *  Otherwise, the line will break at the right margin.  Extremely long
126  *  lines will cause the system to emit a warning (TODO: hyphenate, if
127  *  possible).
128  */
129 void
130 term_flushln(struct termp *p)
131 {
132 	int		 i, j;
133 	size_t		 vbl, vsz, vis, maxvis, mmax, bp;
134 	static int	 overstep = 0;
135 
136 	/*
137 	 * First, establish the maximum columns of "visible" content.
138 	 * This is usually the difference between the right-margin and
139 	 * an indentation, but can be, for tagged lists or columns, a
140 	 * small set of values.
141 	 */
142 
143 	assert(p->offset < p->rmargin);
144 	assert((int)(p->rmargin - p->offset) - overstep > 0);
145 
146 	maxvis = /* LINTED */
147 		p->rmargin - p->offset - overstep;
148 	mmax = /* LINTED */
149 		p->maxrmargin - p->offset - overstep;
150 
151 	bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
152 	vis = 0;
153 
154 	/*
155 	 * If in the standard case (left-justified), then begin with our
156 	 * indentation, otherwise (columns, etc.) just start spitting
157 	 * out text.
158 	 */
159 
160 	if ( ! (p->flags & TERMP_NOLPAD))
161 		/* LINTED */
162 		for (j = 0; j < (int)p->offset; j++)
163 			putchar(' ');
164 
165 	for (i = 0; i < (int)p->col; i++) {
166 		/*
167 		 * Count up visible word characters.  Control sequences
168 		 * (starting with the CSI) aren't counted.  A space
169 		 * generates a non-printing word, which is valid (the
170 		 * space is printed according to regular spacing rules).
171 		 */
172 
173 		/* LINTED */
174 		for (j = i, vsz = 0; j < (int)p->col; j++) {
175 			if (j && ' ' == p->buf[j])
176 				break;
177 			else if (8 == p->buf[j])
178 				vsz--;
179 			else
180 				vsz++;
181 		}
182 
183 		/*
184 		 * Choose the number of blanks to prepend: no blank at the
185 		 * beginning of a line, one between words -- but do not
186 		 * actually write them yet.
187 		 */
188 		vbl = (size_t)(0 == vis ? 0 : 1);
189 
190 		/*
191 		 * Find out whether we would exceed the right margin.
192 		 * If so, break to the next line.  (TODO: hyphenate)
193 		 * Otherwise, write the chosen number of blanks now.
194 		 */
195 		if (vis && vis + vbl + vsz > bp) {
196 			putchar('\n');
197 			if (TERMP_NOBREAK & p->flags) {
198 				for (j = 0; j < (int)p->rmargin; j++)
199 					putchar(' ');
200 				vis = p->rmargin - p->offset;
201 			} else {
202 				for (j = 0; j < (int)p->offset; j++)
203 					putchar(' ');
204 				vis = 0;
205 			}
206 			/* Remove the overstep width. */
207 			bp += (int)/* LINTED */
208 				overstep;
209 			overstep = 0;
210 		} else {
211 			for (j = 0; j < (int)vbl; j++)
212 				putchar(' ');
213 			vis += vbl;
214 		}
215 
216 		/*
217 		 * Finally, write out the word.
218 		 */
219 		for ( ; i < (int)p->col; i++) {
220 			if (' ' == p->buf[i])
221 				break;
222 			putchar(p->buf[i]);
223 		}
224 		vis += vsz;
225 	}
226 
227 	p->col = 0;
228 	overstep = 0;
229 
230 	if ( ! (TERMP_NOBREAK & p->flags)) {
231 		putchar('\n');
232 		return;
233 	}
234 
235 	if (TERMP_HANG & p->flags) {
236 		/* We need one blank after the tag. */
237 		overstep = /* LINTED */
238 			vis - maxvis + 1;
239 
240 		/*
241 		 * Behave exactly the same way as groff:
242 		 * If we have overstepped the margin, temporarily move
243 		 * it to the right and flag the rest of the line to be
244 		 * shorter.
245 		 * If we landed right at the margin, be happy.
246 		 * If we are one step before the margin, temporarily
247 		 * move it one step LEFT and flag the rest of the line
248 		 * to be longer.
249 		 */
250 		if (overstep >= -1) {
251 			assert((int)maxvis + overstep >= 0);
252 			/* LINTED */
253 			maxvis += overstep;
254 		} else
255 			overstep = 0;
256 
257 	} else if (TERMP_DANGLE & p->flags)
258 		return;
259 
260 	/* Right-pad. */
261 	if (maxvis > vis + /* LINTED */
262 			((TERMP_TWOSPACE & p->flags) ? 1 : 0))
263 		for ( ; vis < maxvis; vis++)
264 			putchar(' ');
265 	else {	/* ...or newline break. */
266 		putchar('\n');
267 		for (i = 0; i < (int)p->rmargin; i++)
268 			putchar(' ');
269 	}
270 }
271 
272 
273 /*
274  * A newline only breaks an existing line; it won't assert vertical
275  * space.  All data in the output buffer is flushed prior to the newline
276  * assertion.
277  */
278 void
279 term_newln(struct termp *p)
280 {
281 
282 	p->flags |= TERMP_NOSPACE;
283 	if (0 == p->col) {
284 		p->flags &= ~TERMP_NOLPAD;
285 		return;
286 	}
287 	term_flushln(p);
288 	p->flags &= ~TERMP_NOLPAD;
289 }
290 
291 
292 /*
293  * Asserts a vertical space (a full, empty line-break between lines).
294  * Note that if used twice, this will cause two blank spaces and so on.
295  * All data in the output buffer is flushed prior to the newline
296  * assertion.
297  */
298 void
299 term_vspace(struct termp *p)
300 {
301 
302 	term_newln(p);
303 	putchar('\n');
304 }
305 
306 
307 static void
308 do_special(struct termp *p, const char *word, size_t len)
309 {
310 	const char	*rhs;
311 	size_t		 sz;
312 	int		 i;
313 
314 	rhs = chars_a2ascii(p->symtab, word, len, &sz);
315 
316 	if (NULL == rhs) {
317 #if 0
318 		fputs("Unknown special character: ", stderr);
319 		for (i = 0; i < (int)len; i++)
320 			fputc(word[i], stderr);
321 		fputc('\n', stderr);
322 #endif
323 		return;
324 	}
325 	for (i = 0; i < (int)sz; i++)
326 		encode(p, rhs[i]);
327 }
328 
329 
330 static void
331 do_reserved(struct termp *p, const char *word, size_t len)
332 {
333 	const char	*rhs;
334 	size_t		 sz;
335 	int		 i;
336 
337 	rhs = chars_a2res(p->symtab, word, len, &sz);
338 
339 	if (NULL == rhs) {
340 #if 0
341 		fputs("Unknown reserved word: ", stderr);
342 		for (i = 0; i < (int)len; i++)
343 			fputc(word[i], stderr);
344 		fputc('\n', stderr);
345 #endif
346 		return;
347 	}
348 	for (i = 0; i < (int)sz; i++)
349 		encode(p, rhs[i]);
350 }
351 
352 
353 /*
354  * Handle an escape sequence: determine its length and pass it to the
355  * escape-symbol look table.  Note that we assume mdoc(3) has validated
356  * the escape sequence (we assert upon badly-formed escape sequences).
357  */
358 static void
359 do_escaped(struct termp *p, const char **word)
360 {
361 	int		 j, type;
362 	const char	*wp;
363 
364 	wp = *word;
365 	type = 1;
366 
367 	if (0 == *(++wp)) {
368 		*word = wp;
369 		return;
370 	}
371 
372 	if ('(' == *wp) {
373 		wp++;
374 		if (0 == *wp || 0 == *(wp + 1)) {
375 			*word = 0 == *wp ? wp : wp + 1;
376 			return;
377 		}
378 
379 		do_special(p, wp, 2);
380 		*word = ++wp;
381 		return;
382 
383 	} else if ('*' == *wp) {
384 		if (0 == *(++wp)) {
385 			*word = wp;
386 			return;
387 		}
388 
389 		switch (*wp) {
390 		case ('('):
391 			wp++;
392 			if (0 == *wp || 0 == *(wp + 1)) {
393 				*word = 0 == *wp ? wp : wp + 1;
394 				return;
395 			}
396 
397 			do_reserved(p, wp, 2);
398 			*word = ++wp;
399 			return;
400 		case ('['):
401 			type = 0;
402 			break;
403 		default:
404 			do_reserved(p, wp, 1);
405 			*word = wp;
406 			return;
407 		}
408 
409 	} else if ('f' == *wp) {
410 		if (0 == *(++wp)) {
411 			*word = wp;
412 			return;
413 		}
414 
415 		switch (*wp) {
416 		case ('B'):
417 			p->bold++;
418 			break;
419 		case ('I'):
420 			p->under++;
421 			break;
422 		case ('P'):
423 			/* FALLTHROUGH */
424 		case ('R'):
425 			p->bold = p->under = 0;
426 			break;
427 		default:
428 			break;
429 		}
430 
431 		*word = wp;
432 		return;
433 
434 	} else if ('[' != *wp) {
435 		do_special(p, wp, 1);
436 		*word = wp;
437 		return;
438 	}
439 
440 	wp++;
441 	for (j = 0; *wp && ']' != *wp; wp++, j++)
442 		/* Loop... */ ;
443 
444 	if (0 == *wp) {
445 		*word = wp;
446 		return;
447 	}
448 
449 	if (type)
450 		do_special(p, wp - j, (size_t)j);
451 	else
452 		do_reserved(p, wp - j, (size_t)j);
453 	*word = wp;
454 }
455 
456 
457 /*
458  * Handle pwords, partial words, which may be either a single word or a
459  * phrase that cannot be broken down (such as a literal string).  This
460  * handles word styling.
461  */
462 void
463 term_word(struct termp *p, const char *word)
464 {
465 	const char	 *sv;
466 
467 	sv = word;
468 
469 	if (word[0] && 0 == word[1])
470 		switch (word[0]) {
471 		case('.'):
472 			/* FALLTHROUGH */
473 		case(','):
474 			/* FALLTHROUGH */
475 		case(';'):
476 			/* FALLTHROUGH */
477 		case(':'):
478 			/* FALLTHROUGH */
479 		case('?'):
480 			/* FALLTHROUGH */
481 		case('!'):
482 			/* FALLTHROUGH */
483 		case(')'):
484 			/* FALLTHROUGH */
485 		case(']'):
486 			/* FALLTHROUGH */
487 		case('}'):
488 			if ( ! (TERMP_IGNDELIM & p->flags))
489 				p->flags |= TERMP_NOSPACE;
490 			break;
491 		default:
492 			break;
493 		}
494 
495 	if ( ! (TERMP_NOSPACE & p->flags))
496 		buffer(p, ' ');
497 
498 	if ( ! (p->flags & TERMP_NONOSPACE))
499 		p->flags &= ~TERMP_NOSPACE;
500 
501 	for ( ; *word; word++)
502 		if ('\\' != *word)
503 			encode(p, *word);
504 		else
505 			do_escaped(p, &word);
506 
507 	if (sv[0] && 0 == sv[1])
508 		switch (sv[0]) {
509 		case('('):
510 			/* FALLTHROUGH */
511 		case('['):
512 			/* FALLTHROUGH */
513 		case('{'):
514 			p->flags |= TERMP_NOSPACE;
515 			break;
516 		default:
517 			break;
518 		}
519 }
520 
521 
522 /*
523  * Insert a single character into the line-buffer.  If the buffer's
524  * space is exceeded, then allocate more space by doubling the buffer
525  * size.
526  */
527 static void
528 buffer(struct termp *p, char c)
529 {
530 	size_t		 s;
531 
532 	if (p->col + 1 >= p->maxcols) {
533 		if (0 == p->maxcols)
534 			p->maxcols = 256;
535 		s = p->maxcols * 2;
536 		p->buf = realloc(p->buf, s);
537 		if (NULL == p->buf)
538 			err(1, "realloc"); /* FIXME: shouldn't be here! */
539 		p->maxcols = s;
540 	}
541 	p->buf[(int)(p->col)++] = c;
542 }
543 
544 
545 static void
546 encode(struct termp *p, char c)
547 {
548 
549 	if (' ' != c) {
550 		if (p->under) {
551 			buffer(p, '_');
552 			buffer(p, 8);
553 		}
554 		if (p->bold) {
555 			buffer(p, c);
556 			buffer(p, 8);
557 		}
558 	}
559 	buffer(p, c);
560 }
561 
562 
563 size_t
564 term_vspan(const struct roffsu *su)
565 {
566 	double		 r;
567 
568 	switch (su->unit) {
569 	case (SCALE_CM):
570 		r = su->scale * 2;
571 		break;
572 	case (SCALE_IN):
573 		r = su->scale * 6;
574 		break;
575 	case (SCALE_PC):
576 		r = su->scale;
577 		break;
578 	case (SCALE_PT):
579 		r = su->scale / 8;
580 		break;
581 	case (SCALE_MM):
582 		r = su->scale / 1000;
583 		break;
584 	case (SCALE_VS):
585 		r = su->scale;
586 		break;
587 	default:
588 		r = su->scale - 1;
589 		break;
590 	}
591 
592 	if (r < 0.0)
593 		r = 0.0;
594 	return(/* LINTED */(size_t)
595 			r);
596 }
597 
598 
599 size_t
600 term_hspan(const struct roffsu *su)
601 {
602 	double		 r;
603 
604 	/* XXX: CM, IN, and PT are approximations. */
605 
606 	switch (su->unit) {
607 	case (SCALE_CM):
608 		r = 4 * su->scale;
609 		break;
610 	case (SCALE_IN):
611 		/* XXX: this is an approximation. */
612 		r = 10 * su->scale;
613 		break;
614 	case (SCALE_PC):
615 		r = (10 * su->scale) / 6;
616 		break;
617 	case (SCALE_PT):
618 		r = (10 * su->scale) / 72;
619 		break;
620 	case (SCALE_MM):
621 		r = su->scale / 1000; /* FIXME: double-check. */
622 		break;
623 	case (SCALE_VS):
624 		r = su->scale * 2 - 1; /* FIXME: double-check. */
625 		break;
626 	default:
627 		r = su->scale;
628 		break;
629 	}
630 
631 	if (r < 0.0)
632 		r = 0.0;
633 	return((size_t)/* LINTED */
634 			r);
635 }
636 
637 
638