xref: /netbsd-src/external/bsd/mdocml/dist/term.c (revision 288bb96063654ec504ca8732afc683d3ebc514b5)
1 /*	$Vendor-Id: term.c,v 1.183 2011/04/04 21:14:12 kristaps Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "mandoc.h"
32 #include "out.h"
33 #include "term.h"
34 #include "main.h"
35 
36 static	void		  spec(struct termp *, enum roffdeco,
37 				const char *, size_t);
38 static	void		  res(struct termp *, const char *, size_t);
39 static	void		  bufferc(struct termp *, char);
40 static	void		  adjbuf(struct termp *p, size_t);
41 static	void		  encode(struct termp *, const char *, size_t);
42 
43 
44 void
45 term_free(struct termp *p)
46 {
47 
48 	if (p->buf)
49 		free(p->buf);
50 	if (p->symtab)
51 		chars_free(p->symtab);
52 
53 	free(p);
54 }
55 
56 
57 void
58 term_begin(struct termp *p, term_margin head,
59 		term_margin foot, const void *arg)
60 {
61 
62 	p->headf = head;
63 	p->footf = foot;
64 	p->argf = arg;
65 	(*p->begin)(p);
66 }
67 
68 
69 void
70 term_end(struct termp *p)
71 {
72 
73 	(*p->end)(p);
74 }
75 
76 
77 struct termp *
78 term_alloc(enum termenc enc)
79 {
80 	struct termp	*p;
81 
82 	p = mandoc_calloc(1, sizeof(struct termp));
83 	p->enc = enc;
84 	return(p);
85 }
86 
87 
88 /*
89  * Flush a line of text.  A "line" is loosely defined as being something
90  * that should be followed by a newline, regardless of whether it's
91  * broken apart by newlines getting there.  A line can also be a
92  * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does
93  * not have a trailing newline.
94  *
95  * The following flags may be specified:
96  *
97  *  - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
98  *    offset value.  This is useful when doing columnar lists where the
99  *    prior column has right-padded.
100  *
101  *  - TERMP_NOBREAK: this is the most important and is used when making
102  *    columns.  In short: don't print a newline and instead pad to the
103  *    right margin.  Used in conjunction with TERMP_NOLPAD.
104  *
105  *  - TERMP_TWOSPACE: when padding, make sure there are at least two
106  *    space characters of padding.  Otherwise, rather break the line.
107  *
108  *  - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
109  *    the line is overrun, and don't pad-right if it's underrun.
110  *
111  *  - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
112  *    overruning, instead save the position and continue at that point
113  *    when the next invocation.
114  *
115  *  In-line line breaking:
116  *
117  *  If TERMP_NOBREAK is specified and the line overruns the right
118  *  margin, it will break and pad-right to the right margin after
119  *  writing.  If maxrmargin is violated, it will break and continue
120  *  writing from the right-margin, which will lead to the above scenario
121  *  upon exit.  Otherwise, the line will break at the right margin.
122  */
123 void
124 term_flushln(struct termp *p)
125 {
126 	int		 i;     /* current input position in p->buf */
127 	size_t		 vis;   /* current visual position on output */
128 	size_t		 vbl;   /* number of blanks to prepend to output */
129 	size_t		 vend;	/* end of word visual position on output */
130 	size_t		 bp;    /* visual right border position */
131 	size_t		 dv;    /* temporary for visual pos calculations */
132 	int		 j;     /* temporary loop index for p->buf */
133 	int		 jhy;	/* last hyph before overflow w/r/t j */
134 	size_t		 maxvis; /* output position of visible boundary */
135 	size_t		 mmax; /* used in calculating bp */
136 
137 	/*
138 	 * First, establish the maximum columns of "visible" content.
139 	 * This is usually the difference between the right-margin and
140 	 * an indentation, but can be, for tagged lists or columns, a
141 	 * small set of values.
142 	 */
143 	assert  (p->rmargin >= p->offset);
144 	dv     = p->rmargin - p->offset;
145 	maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
146 	dv     = p->maxrmargin - p->offset;
147 	mmax   = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
148 
149 	bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
150 
151 	/*
152 	 * Indent the first line of a paragraph.
153 	 */
154 	vbl = p->flags & TERMP_NOLPAD ? (size_t)0 : p->offset;
155 
156 	vis = vend = 0;
157 	i = 0;
158 
159 	while (i < (int)p->col) {
160 		/*
161 		 * Handle literal tab characters: collapse all
162 		 * subsequent tabs into a single huge set of spaces.
163 		 */
164 		while (i < (int)p->col && '\t' == p->buf[i]) {
165 			vend = (vis / p->tabwidth + 1) * p->tabwidth;
166 			vbl += vend - vis;
167 			vis = vend;
168 			i++;
169 		}
170 
171 		/*
172 		 * Count up visible word characters.  Control sequences
173 		 * (starting with the CSI) aren't counted.  A space
174 		 * generates a non-printing word, which is valid (the
175 		 * space is printed according to regular spacing rules).
176 		 */
177 
178 		for (j = i, jhy = 0; j < (int)p->col; j++) {
179 			if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
180 				break;
181 
182 			/* Back over the the last printed character. */
183 			if (8 == p->buf[j]) {
184 				assert(j);
185 				vend -= (*p->width)(p, p->buf[j - 1]);
186 				continue;
187 			}
188 
189 			/* Regular word. */
190 			/* Break at the hyphen point if we overrun. */
191 			if (vend > vis && vend < bp &&
192 					ASCII_HYPH == p->buf[j])
193 				jhy = j;
194 
195 			vend += (*p->width)(p, p->buf[j]);
196 		}
197 
198 		/*
199 		 * Find out whether we would exceed the right margin.
200 		 * If so, break to the next line.
201 		 */
202 		if (vend > bp && 0 == jhy && vis > 0) {
203 			vend -= vis;
204 			(*p->endline)(p);
205 			if (TERMP_NOBREAK & p->flags) {
206 				p->viscol = p->rmargin;
207 				(*p->advance)(p, p->rmargin);
208 				vend += p->rmargin - p->offset;
209 			} else {
210 				p->viscol = 0;
211 				vbl = p->offset;
212 			}
213 
214 			/* Remove the p->overstep width. */
215 
216 			bp += (size_t)p->overstep;
217 			p->overstep = 0;
218 		}
219 
220 		/* Write out the [remaining] word. */
221 		for ( ; i < (int)p->col; i++) {
222 			if (vend > bp && jhy > 0 && i > jhy)
223 				break;
224 			if ('\t' == p->buf[i])
225 				break;
226 			if (' ' == p->buf[i]) {
227 				j = i;
228 				while (' ' == p->buf[i])
229 					i++;
230 				dv = (size_t)(i - j) * (*p->width)(p, ' ');
231 				vbl += dv;
232 				vend += dv;
233 				break;
234 			}
235 			if (ASCII_NBRSP == p->buf[i]) {
236 				vbl += (*p->width)(p, ' ');
237 				continue;
238 			}
239 
240 			/*
241 			 * Now we definitely know there will be
242 			 * printable characters to output,
243 			 * so write preceding white space now.
244 			 */
245 			if (vbl) {
246 				(*p->advance)(p, vbl);
247 				p->viscol += vbl;
248 				vbl = 0;
249 			}
250 
251 			if (ASCII_HYPH == p->buf[i]) {
252 				(*p->letter)(p, '-');
253 				p->viscol += (*p->width)(p, '-');
254 			} else {
255 				(*p->letter)(p, p->buf[i]);
256 				p->viscol += (*p->width)(p, p->buf[i]);
257 			}
258 		}
259 		vis = vend;
260 	}
261 
262 	/*
263 	 * If there was trailing white space, it was not printed;
264 	 * so reset the cursor position accordingly.
265 	 */
266 	vis -= vbl;
267 
268 	p->col = 0;
269 	p->overstep = 0;
270 
271 	if ( ! (TERMP_NOBREAK & p->flags)) {
272 		p->viscol = 0;
273 		(*p->endline)(p);
274 		return;
275 	}
276 
277 	if (TERMP_HANG & p->flags) {
278 		/* We need one blank after the tag. */
279 		p->overstep = (int)(vis - maxvis + (*p->width)(p, ' '));
280 
281 		/*
282 		 * Behave exactly the same way as groff:
283 		 * If we have overstepped the margin, temporarily move
284 		 * it to the right and flag the rest of the line to be
285 		 * shorter.
286 		 * If we landed right at the margin, be happy.
287 		 * If we are one step before the margin, temporarily
288 		 * move it one step LEFT and flag the rest of the line
289 		 * to be longer.
290 		 */
291 		if (p->overstep >= -1) {
292 			assert((int)maxvis + p->overstep >= 0);
293 			maxvis += (size_t)p->overstep;
294 		} else
295 			p->overstep = 0;
296 
297 	} else if (TERMP_DANGLE & p->flags)
298 		return;
299 
300 	/* Right-pad. */
301 	if (maxvis > vis +
302 	    ((TERMP_TWOSPACE & p->flags) ? (*p->width)(p, ' ') : 0)) {
303 		p->viscol += maxvis - vis;
304 		(*p->advance)(p, maxvis - vis);
305 		vis += (maxvis - vis);
306 	} else {	/* ...or newline break. */
307 		(*p->endline)(p);
308 		p->viscol = p->rmargin;
309 		(*p->advance)(p, p->rmargin);
310 	}
311 }
312 
313 
314 /*
315  * A newline only breaks an existing line; it won't assert vertical
316  * space.  All data in the output buffer is flushed prior to the newline
317  * assertion.
318  */
319 void
320 term_newln(struct termp *p)
321 {
322 
323 	p->flags |= TERMP_NOSPACE;
324 	if (0 == p->col && 0 == p->viscol) {
325 		p->flags &= ~TERMP_NOLPAD;
326 		return;
327 	}
328 	term_flushln(p);
329 	p->flags &= ~TERMP_NOLPAD;
330 }
331 
332 
333 /*
334  * Asserts a vertical space (a full, empty line-break between lines).
335  * Note that if used twice, this will cause two blank spaces and so on.
336  * All data in the output buffer is flushed prior to the newline
337  * assertion.
338  */
339 void
340 term_vspace(struct termp *p)
341 {
342 
343 	term_newln(p);
344 	p->viscol = 0;
345 	(*p->endline)(p);
346 }
347 
348 
349 static void
350 numbered(struct termp *p, const char *word, size_t len)
351 {
352 	const char	*rhs;
353 
354 	rhs = chars_num2char(word, len);
355 	if (rhs)
356 		encode(p, rhs, 1);
357 }
358 
359 
360 static void
361 spec(struct termp *p, enum roffdeco d, const char *word, size_t len)
362 {
363 	const char	*rhs;
364 	size_t		 sz;
365 
366 	rhs = chars_spec2str(p->symtab, word, len, &sz);
367 	if (rhs)
368 		encode(p, rhs, sz);
369 	else if (DECO_SSPECIAL == d)
370 		encode(p, word, len);
371 }
372 
373 
374 static void
375 res(struct termp *p, const char *word, size_t len)
376 {
377 	const char	*rhs;
378 	size_t		 sz;
379 
380 	rhs = chars_res2str(p->symtab, word, len, &sz);
381 	if (rhs)
382 		encode(p, rhs, sz);
383 }
384 
385 
386 void
387 term_fontlast(struct termp *p)
388 {
389 	enum termfont	 f;
390 
391 	f = p->fontl;
392 	p->fontl = p->fontq[p->fonti];
393 	p->fontq[p->fonti] = f;
394 }
395 
396 
397 void
398 term_fontrepl(struct termp *p, enum termfont f)
399 {
400 
401 	p->fontl = p->fontq[p->fonti];
402 	p->fontq[p->fonti] = f;
403 }
404 
405 
406 void
407 term_fontpush(struct termp *p, enum termfont f)
408 {
409 
410 	assert(p->fonti + 1 < 10);
411 	p->fontl = p->fontq[p->fonti];
412 	p->fontq[++p->fonti] = f;
413 }
414 
415 
416 const void *
417 term_fontq(struct termp *p)
418 {
419 
420 	return(&p->fontq[p->fonti]);
421 }
422 
423 
424 enum termfont
425 term_fonttop(struct termp *p)
426 {
427 
428 	return(p->fontq[p->fonti]);
429 }
430 
431 
432 void
433 term_fontpopq(struct termp *p, const void *key)
434 {
435 
436 	while (p->fonti >= 0 && key != &p->fontq[p->fonti])
437 		p->fonti--;
438 	assert(p->fonti >= 0);
439 }
440 
441 
442 void
443 term_fontpop(struct termp *p)
444 {
445 
446 	assert(p->fonti);
447 	p->fonti--;
448 }
449 
450 
451 /*
452  * Handle pwords, partial words, which may be either a single word or a
453  * phrase that cannot be broken down (such as a literal string).  This
454  * handles word styling.
455  */
456 void
457 term_word(struct termp *p, const char *word)
458 {
459 	const char	*seq;
460 	size_t		 ssz;
461 	enum roffdeco	 deco;
462 
463 	if ( ! (TERMP_NOSPACE & p->flags)) {
464 		if ( ! (TERMP_KEEP & p->flags)) {
465 			if (TERMP_PREKEEP & p->flags)
466 				p->flags |= TERMP_KEEP;
467 			bufferc(p, ' ');
468 			if (TERMP_SENTENCE & p->flags)
469 				bufferc(p, ' ');
470 		} else
471 			bufferc(p, ASCII_NBRSP);
472 	}
473 
474 	if ( ! (p->flags & TERMP_NONOSPACE))
475 		p->flags &= ~TERMP_NOSPACE;
476 	else
477 		p->flags |= TERMP_NOSPACE;
478 
479 	p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);
480 
481 	while (*word) {
482 		if ((ssz = strcspn(word, "\\")) > 0)
483 			encode(p, word, ssz);
484 
485 		word += (int)ssz;
486 		if ('\\' != *word)
487 			continue;
488 
489 		seq = ++word;
490 		word += a2roffdeco(&deco, &seq, &ssz);
491 
492 		switch (deco) {
493 		case (DECO_NUMBERED):
494 			numbered(p, seq, ssz);
495 			break;
496 		case (DECO_RESERVED):
497 			res(p, seq, ssz);
498 			break;
499 		case (DECO_SPECIAL):
500 			/* FALLTHROUGH */
501 		case (DECO_SSPECIAL):
502 			spec(p, deco, seq, ssz);
503 			break;
504 		case (DECO_BOLD):
505 			term_fontrepl(p, TERMFONT_BOLD);
506 			break;
507 		case (DECO_ITALIC):
508 			term_fontrepl(p, TERMFONT_UNDER);
509 			break;
510 		case (DECO_ROMAN):
511 			term_fontrepl(p, TERMFONT_NONE);
512 			break;
513 		case (DECO_PREVIOUS):
514 			term_fontlast(p);
515 			break;
516 		default:
517 			break;
518 		}
519 
520 		if (DECO_NOSPACE == deco && '\0' == *word)
521 			p->flags |= TERMP_NOSPACE;
522 	}
523 }
524 
525 
526 static void
527 adjbuf(struct termp *p, size_t sz)
528 {
529 
530 	if (0 == p->maxcols)
531 		p->maxcols = 1024;
532 	while (sz >= p->maxcols)
533 		p->maxcols <<= 2;
534 
535 	p->buf = mandoc_realloc(p->buf, p->maxcols);
536 }
537 
538 
539 static void
540 bufferc(struct termp *p, char c)
541 {
542 
543 	if (p->col + 1 >= p->maxcols)
544 		adjbuf(p, p->col + 1);
545 
546 	p->buf[(int)p->col++] = c;
547 }
548 
549 
550 static void
551 encode(struct termp *p, const char *word, size_t sz)
552 {
553 	enum termfont	  f;
554 	int		  i;
555 
556 	/*
557 	 * Encode and buffer a string of characters.  If the current
558 	 * font mode is unset, buffer directly, else encode then buffer
559 	 * character by character.
560 	 */
561 
562 	if (TERMFONT_NONE == (f = term_fonttop(p))) {
563 		if (p->col + sz >= p->maxcols)
564 			adjbuf(p, p->col + sz);
565 		memcpy(&p->buf[(int)p->col], word, sz);
566 		p->col += sz;
567 		return;
568 	}
569 
570 	/* Pre-buffer, assuming worst-case. */
571 
572 	if (p->col + 1 + (sz * 3) >= p->maxcols)
573 		adjbuf(p, p->col + 1 + (sz * 3));
574 
575 	for (i = 0; i < (int)sz; i++) {
576 		if ( ! isgraph((u_char)word[i])) {
577 			p->buf[(int)p->col++] = word[i];
578 			continue;
579 		}
580 
581 		if (TERMFONT_UNDER == f)
582 			p->buf[(int)p->col++] = '_';
583 		else
584 			p->buf[(int)p->col++] = word[i];
585 
586 		p->buf[(int)p->col++] = 8;
587 		p->buf[(int)p->col++] = word[i];
588 	}
589 }
590 
591 
592 size_t
593 term_len(const struct termp *p, size_t sz)
594 {
595 
596 	return((*p->width)(p, ' ') * sz);
597 }
598 
599 
600 size_t
601 term_strlen(const struct termp *p, const char *cp)
602 {
603 	size_t		 sz, ssz, rsz, i;
604 	enum roffdeco	 d;
605 	const char	*seq, *rhs;
606 
607 	for (sz = 0; '\0' != *cp; )
608 		/*
609 		 * Account for escaped sequences within string length
610 		 * calculations.  This follows the logic in term_word()
611 		 * as we must calculate the width of produced strings.
612 		 */
613 		if ('\\' == *cp) {
614 			seq = ++cp;
615 			cp += a2roffdeco(&d, &seq, &ssz);
616 
617 			switch (d) {
618 			case (DECO_RESERVED):
619 				rhs = chars_res2str
620 					(p->symtab, seq, ssz, &rsz);
621 				break;
622 			case (DECO_SPECIAL):
623 				/* FALLTHROUGH */
624 			case (DECO_SSPECIAL):
625 				rhs = chars_spec2str
626 					(p->symtab, seq, ssz, &rsz);
627 
628 				/* Allow for one-char escapes. */
629 				if (DECO_SSPECIAL != d || rhs)
630 					break;
631 
632 				rhs = seq;
633 				rsz = ssz;
634 				break;
635 			default:
636 				rhs = NULL;
637 				break;
638 			}
639 
640 			if (rhs)
641 				for (i = 0; i < rsz; i++)
642 					sz += (*p->width)(p, *rhs++);
643 		} else if (ASCII_NBRSP == *cp) {
644 			sz += (*p->width)(p, ' ');
645 			cp++;
646 		} else if (ASCII_HYPH == *cp) {
647 			sz += (*p->width)(p, '-');
648 			cp++;
649 		} else
650 			sz += (*p->width)(p, *cp++);
651 
652 	return(sz);
653 }
654 
655 
656 /* ARGSUSED */
657 size_t
658 term_vspan(const struct termp *p, const struct roffsu *su)
659 {
660 	double		 r;
661 
662 	switch (su->unit) {
663 	case (SCALE_CM):
664 		r = su->scale * 2;
665 		break;
666 	case (SCALE_IN):
667 		r = su->scale * 6;
668 		break;
669 	case (SCALE_PC):
670 		r = su->scale;
671 		break;
672 	case (SCALE_PT):
673 		r = su->scale / 8;
674 		break;
675 	case (SCALE_MM):
676 		r = su->scale / 1000;
677 		break;
678 	case (SCALE_VS):
679 		r = su->scale;
680 		break;
681 	default:
682 		r = su->scale - 1;
683 		break;
684 	}
685 
686 	if (r < 0.0)
687 		r = 0.0;
688 	return(/* LINTED */(size_t)
689 			r);
690 }
691 
692 
693 size_t
694 term_hspan(const struct termp *p, const struct roffsu *su)
695 {
696 	double		 v;
697 
698 	v = ((*p->hspan)(p, su));
699 	if (v < 0.0)
700 		v = 0.0;
701 	return((size_t) /* LINTED */
702 			v);
703 }
704