xref: /openbsd-src/usr.bin/mandoc/term.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /* $Id: term.c,v 1.1 2009/04/06 20:30:40 kristaps Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the
7  * above copyright notice and this permission notice appear in all
8  * copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11  * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12  * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13  * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16  * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17  * PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <assert.h>
20 #include <err.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 
25 #include "term.h"
26 #include "man.h"
27 #include "mdoc.h"
28 
29 extern	int		  man_run(struct termp *,
30 				const struct man *);
31 extern	int		  mdoc_run(struct termp *,
32 				const struct mdoc *);
33 
34 static	struct termp	 *term_alloc(enum termenc);
35 static	void		  term_free(struct termp *);
36 static	void		  term_pword(struct termp *, const char *, int);
37 static	void		  term_pescape(struct termp *,
38 				const char *, int *, int);
39 static	void		  term_nescape(struct termp *,
40 				const char *, size_t);
41 static	void		  term_chara(struct termp *, char);
42 static	void		  term_stringa(struct termp *,
43 				const char *, size_t);
44 static	int		  term_isopendelim(const char *, int);
45 static	int		  term_isclosedelim(const char *, int);
46 
47 
48 void *
49 ascii_alloc(void)
50 {
51 
52 	return(term_alloc(TERMENC_ASCII));
53 }
54 
55 
56 int
57 terminal_man(void *arg, const struct man *man)
58 {
59 	struct termp	*p;
60 
61 	p = (struct termp *)arg;
62 	if (NULL == p->symtab)
63 		p->symtab = term_ascii2htab();
64 
65 	return(man_run(p, man));
66 }
67 
68 
69 int
70 terminal_mdoc(void *arg, const struct mdoc *mdoc)
71 {
72 	struct termp	*p;
73 
74 	p = (struct termp *)arg;
75 	if (NULL == p->symtab)
76 		p->symtab = term_ascii2htab();
77 
78 	return(mdoc_run(p, mdoc));
79 }
80 
81 
82 void
83 terminal_free(void *arg)
84 {
85 
86 	term_free((struct termp *)arg);
87 }
88 
89 
90 static void
91 term_free(struct termp *p)
92 {
93 
94 	if (p->buf)
95 		free(p->buf);
96 	if (TERMENC_ASCII == p->enc && p->symtab)
97 		term_asciifree(p->symtab);
98 
99 	free(p);
100 }
101 
102 
103 static struct termp *
104 term_alloc(enum termenc enc)
105 {
106 	struct termp *p;
107 
108 	if (NULL == (p = malloc(sizeof(struct termp))))
109 		err(1, "malloc");
110 	bzero(p, sizeof(struct termp));
111 	p->maxrmargin = 78;
112 	p->enc = enc;
113 	return(p);
114 }
115 
116 
117 static int
118 term_isclosedelim(const char *p, int len)
119 {
120 
121 	if (1 != len)
122 		return(0);
123 
124 	switch (*p) {
125 	case('.'):
126 		/* FALLTHROUGH */
127 	case(','):
128 		/* FALLTHROUGH */
129 	case(';'):
130 		/* FALLTHROUGH */
131 	case(':'):
132 		/* FALLTHROUGH */
133 	case('?'):
134 		/* FALLTHROUGH */
135 	case('!'):
136 		/* FALLTHROUGH */
137 	case(')'):
138 		/* FALLTHROUGH */
139 	case(']'):
140 		/* FALLTHROUGH */
141 	case('}'):
142 		return(1);
143 	default:
144 		break;
145 	}
146 
147 	return(0);
148 }
149 
150 
151 static int
152 term_isopendelim(const char *p, int len)
153 {
154 
155 	if (1 != len)
156 		return(0);
157 
158 	switch (*p) {
159 	case('('):
160 		/* FALLTHROUGH */
161 	case('['):
162 		/* FALLTHROUGH */
163 	case('{'):
164 		return(1);
165 	default:
166 		break;
167 	}
168 
169 	return(0);
170 }
171 
172 
173 /*
174  * Flush a line of text.  A "line" is loosely defined as being something
175  * that should be followed by a newline, regardless of whether it's
176  * broken apart by newlines getting there.  A line can also be a
177  * fragment of a columnar list.
178  *
179  * Specifically, a line is whatever's in p->buf of length p->col, which
180  * is zeroed after this function returns.
181  *
182  * The variables TERMP_NOLPAD, TERMP_LITERAL and TERMP_NOBREAK are of
183  * critical importance here.  Their behaviour follows:
184  *
185  *  - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
186  *    offset value.  This is useful when doing columnar lists where the
187  *    prior column has right-padded.
188  *
189  *  - TERMP_NOBREAK: this is the most important and is used when making
190  *    columns.  In short: don't print a newline and instead pad to the
191  *    right margin.  Used in conjunction with TERMP_NOLPAD.
192  *
193  *  - TERMP_NONOBREAK: don't newline when TERMP_NOBREAK is specified.
194  *
195  *  In-line line breaking:
196  *
197  *  If TERMP_NOBREAK is specified and the line overruns the right
198  *  margin, it will break and pad-right to the right margin after
199  *  writing.  If maxrmargin is violated, it will break and continue
200  *  writing from the right-margin, which will lead to the above
201  *  scenario upon exit.
202  *
203  *  Otherwise, the line will break at the right margin.  Extremely long
204  *  lines will cause the system to emit a warning (TODO: hyphenate, if
205  *  possible).
206  */
207 void
208 term_flushln(struct termp *p)
209 {
210 	int		 i, j;
211 	size_t		 vsz, vis, maxvis, mmax, bp;
212 
213 	/*
214 	 * First, establish the maximum columns of "visible" content.
215 	 * This is usually the difference between the right-margin and
216 	 * an indentation, but can be, for tagged lists or columns, a
217 	 * small set of values.
218 	 */
219 
220 	assert(p->offset < p->rmargin);
221 	maxvis = p->rmargin - p->offset;
222 	mmax = p->maxrmargin - p->offset;
223 	bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
224 	vis = 0;
225 
226 	/*
227 	 * If in the standard case (left-justified), then begin with our
228 	 * indentation, otherwise (columns, etc.) just start spitting
229 	 * out text.
230 	 */
231 
232 	if ( ! (p->flags & TERMP_NOLPAD))
233 		/* LINTED */
234 		for (j = 0; j < (int)p->offset; j++)
235 			putchar(' ');
236 
237 	for (i = 0; i < (int)p->col; i++) {
238 		/*
239 		 * Count up visible word characters.  Control sequences
240 		 * (starting with the CSI) aren't counted.  A space
241 		 * generates a non-printing word, which is valid (the
242 		 * space is printed according to regular spacing rules).
243 		 */
244 
245 		/* LINTED */
246 		for (j = i, vsz = 0; j < (int)p->col; j++) {
247 			if (' ' == p->buf[j])
248 				break;
249 			else if (8 == p->buf[j])
250 				j += 1;
251 			else
252 				vsz++;
253 		}
254 
255 		/*
256 		 * Do line-breaking.  If we're greater than our
257 		 * break-point and already in-line, break to the next
258 		 * line and start writing.  If we're at the line start,
259 		 * then write out the word (TODO: hyphenate) and break
260 		 * in a subsequent loop invocation.
261 		 */
262 
263 		if ( ! (TERMP_NOBREAK & p->flags)) {
264 			if (vis && vis + vsz > bp) {
265 				putchar('\n');
266 				for (j = 0; j < (int)p->offset; j++)
267 					putchar(' ');
268 				vis = 0;
269 			}
270 		} else if (vis && vis + vsz > bp) {
271 			putchar('\n');
272 			for (j = 0; j < (int)p->rmargin; j++)
273 				putchar(' ');
274 			vis = p->rmargin - p->offset;
275 		}
276 
277 		/*
278 		 * Write out the word and a trailing space.  Omit the
279 		 * space if we're the last word in the line or beyond
280 		 * our breakpoint.
281 		 */
282 
283 		for ( ; i < (int)p->col; i++) {
284 			if (' ' == p->buf[i])
285 				break;
286 			putchar(p->buf[i]);
287 		}
288 		vis += vsz;
289 		if (i < (int)p->col && vis <= bp) {
290 			putchar(' ');
291 			vis++;
292 		}
293 	}
294 
295 	/*
296 	 * If we've overstepped our maximum visible no-break space, then
297 	 * cause a newline and offset at the right margin.
298 	 */
299 
300 	if ((TERMP_NOBREAK & p->flags) && vis >= maxvis) {
301 		if ( ! (TERMP_NONOBREAK & p->flags)) {
302 			putchar('\n');
303 			for (i = 0; i < (int)p->rmargin; i++)
304 				putchar(' ');
305 		}
306 		p->col = 0;
307 		return;
308 	}
309 
310 	/*
311 	 * If we're not to right-marginalise it (newline), then instead
312 	 * pad to the right margin and stay off.
313 	 */
314 
315 	if (p->flags & TERMP_NOBREAK) {
316 		if ( ! (TERMP_NONOBREAK & p->flags))
317 			for ( ; vis < maxvis; vis++)
318 				putchar(' ');
319 	} else
320 		putchar('\n');
321 
322 	p->col = 0;
323 }
324 
325 
326 /*
327  * A newline only breaks an existing line; it won't assert vertical
328  * space.  All data in the output buffer is flushed prior to the newline
329  * assertion.
330  */
331 void
332 term_newln(struct termp *p)
333 {
334 
335 	p->flags |= TERMP_NOSPACE;
336 	if (0 == p->col) {
337 		p->flags &= ~TERMP_NOLPAD;
338 		return;
339 	}
340 	term_flushln(p);
341 	p->flags &= ~TERMP_NOLPAD;
342 }
343 
344 
345 /*
346  * Asserts a vertical space (a full, empty line-break between lines).
347  * Note that if used twice, this will cause two blank spaces and so on.
348  * All data in the output buffer is flushed prior to the newline
349  * assertion.
350  */
351 void
352 term_vspace(struct termp *p)
353 {
354 
355 	term_newln(p);
356 	putchar('\n');
357 }
358 
359 
360 /*
361  * Break apart a word into "pwords" (partial-words, usually from
362  * breaking up a phrase into individual words) and, eventually, put them
363  * into the output buffer.  If we're a literal word, then don't break up
364  * the word and put it verbatim into the output buffer.
365  */
366 void
367 term_word(struct termp *p, const char *word)
368 {
369 	int 		 i, j, len;
370 
371 	len = (int)strlen(word);
372 
373 	if (p->flags & TERMP_LITERAL) {
374 		term_pword(p, word, len);
375 		return;
376 	}
377 
378 	/* LINTED */
379 	for (j = i = 0; i < len; i++) {
380 		if (' ' != word[i]) {
381 			j++;
382 			continue;
383 		}
384 
385 		/* Escaped spaces don't delimit... */
386 		if (i && ' ' == word[i] && '\\' == word[i - 1]) {
387 			j++;
388 			continue;
389 		}
390 
391 		if (0 == j)
392 			continue;
393 		assert(i >= j);
394 		term_pword(p, &word[i - j], j);
395 		j = 0;
396 	}
397 	if (j > 0) {
398 		assert(i >= j);
399 		term_pword(p, &word[i - j], j);
400 	}
401 }
402 
403 
404 /*
405  * Determine the symbol indicated by an escape sequences, that is, one
406  * starting with a backslash.  Once done, we pass this value into the
407  * output buffer by way of the symbol table.
408  */
409 static void
410 term_nescape(struct termp *p, const char *word, size_t len)
411 {
412 	const char	*rhs;
413 	size_t		 sz;
414 
415 	if (NULL == (rhs = term_a2ascii(p->symtab, word, len, &sz)))
416 		return;
417 	term_stringa(p, rhs, sz);
418 }
419 
420 
421 /*
422  * Handle an escape sequence: determine its length and pass it to the
423  * escape-symbol look table.  Note that we assume mdoc(3) has validated
424  * the escape sequence (we assert upon badly-formed escape sequences).
425  */
426 static void
427 term_pescape(struct termp *p, const char *word, int *i, int len)
428 {
429 	int		 j;
430 
431 	if (++(*i) >= len)
432 		return;
433 
434 	if ('(' == word[*i]) {
435 		(*i)++;
436 		if (*i + 1 >= len)
437 			return;
438 
439 		term_nescape(p, &word[*i], 2);
440 		(*i)++;
441 		return;
442 
443 	} else if ('*' == word[*i]) {
444 		(*i)++;
445 		if (*i >= len)
446 			return;
447 
448 		switch (word[*i]) {
449 		case ('('):
450 			(*i)++;
451 			if (*i + 1 >= len)
452 				return;
453 
454 			term_nescape(p, &word[*i], 2);
455 			(*i)++;
456 			return;
457 		case ('['):
458 			break;
459 		default:
460 			term_nescape(p, &word[*i], 1);
461 			return;
462 		}
463 
464 	} else if ('f' == word[*i]) {
465 		(*i)++;
466 		if (*i >= len)
467 			return;
468 		switch (word[*i]) {
469 		case ('B'):
470 			p->flags |= TERMP_BOLD;
471 			break;
472 		case ('I'):
473 			p->flags |= TERMP_UNDER;
474 			break;
475 		case ('P'):
476 			/* FALLTHROUGH */
477 		case ('R'):
478 			p->flags &= ~TERMP_STYLE;
479 			break;
480 		default:
481 			break;
482 		}
483 		return;
484 
485 	} else if ('[' != word[*i]) {
486 		term_nescape(p, &word[*i], 1);
487 		return;
488 	}
489 
490 	(*i)++;
491 	for (j = 0; word[*i] && ']' != word[*i]; (*i)++, j++)
492 		/* Loop... */ ;
493 
494 	if (0 == word[*i])
495 		return;
496 
497 	term_nescape(p, &word[*i - j], (size_t)j);
498 }
499 
500 
501 /*
502  * Handle pwords, partial words, which may be either a single word or a
503  * phrase that cannot be broken down (such as a literal string).  This
504  * handles word styling.
505  */
506 static void
507 term_pword(struct termp *p, const char *word, int len)
508 {
509 	int		 i;
510 
511 	if (term_isclosedelim(word, len))
512 		if ( ! (TERMP_IGNDELIM & p->flags))
513 			p->flags |= TERMP_NOSPACE;
514 
515 	if ( ! (TERMP_NOSPACE & p->flags))
516 		term_chara(p, ' ');
517 
518 	if ( ! (p->flags & TERMP_NONOSPACE))
519 		p->flags &= ~TERMP_NOSPACE;
520 
521 	/*
522 	 * If ANSI (word-length styling), then apply our style now,
523 	 * before the word.
524 	 */
525 
526 	for (i = 0; i < len; i++) {
527 		if ('\\' == word[i]) {
528 			term_pescape(p, word, &i, len);
529 			continue;
530 		}
531 
532 		if (TERMP_STYLE & p->flags) {
533 			if (TERMP_BOLD & p->flags) {
534 				term_chara(p, word[i]);
535 				term_chara(p, 8);
536 			}
537 			if (TERMP_UNDER & p->flags) {
538 				term_chara(p, '_');
539 				term_chara(p, 8);
540 			}
541 		}
542 
543 		term_chara(p, word[i]);
544 	}
545 
546 	if (term_isopendelim(word, len))
547 		p->flags |= TERMP_NOSPACE;
548 }
549 
550 
551 /*
552  * Like term_chara() but for arbitrary-length buffers.  Resize the
553  * buffer by a factor of two (if the buffer is less than that) or the
554  * buffer's size.
555  */
556 static void
557 term_stringa(struct termp *p, const char *c, size_t sz)
558 {
559 	size_t		 s;
560 
561 	if (0 == sz)
562 		return;
563 
564 	assert(c);
565 	if (p->col + sz >= p->maxcols) {
566 		if (0 == p->maxcols)
567 			p->maxcols = 256;
568 		s = sz > p->maxcols * 2 ? sz : p->maxcols * 2;
569 		p->buf = realloc(p->buf, s);
570 		if (NULL == p->buf)
571 			err(1, "realloc");
572 		p->maxcols = s;
573 	}
574 
575 	(void)memcpy(&p->buf[(int)p->col], c, sz);
576 	p->col += sz;
577 }
578 
579 
580 /*
581  * Insert a single character into the line-buffer.  If the buffer's
582  * space is exceeded, then allocate more space by doubling the buffer
583  * size.
584  */
585 static void
586 term_chara(struct termp *p, char c)
587 {
588 	size_t		 s;
589 
590 	if (p->col + 1 >= p->maxcols) {
591 		if (0 == p->maxcols)
592 			p->maxcols = 256;
593 		s = p->maxcols * 2;
594 		p->buf = realloc(p->buf, s);
595 		if (NULL == p->buf)
596 			err(1, "realloc");
597 		p->maxcols = s;
598 	}
599 	p->buf[(int)(p->col)++] = c;
600 }
601 
602