xref: /openbsd-src/usr.bin/mandoc/tbl_layout.c (revision e5157e49389faebcb42b7237d55fbf096d9c2523)
1 /*	$OpenBSD: tbl_layout.c,v 1.16 2014/10/14 02:16:02 schwarze Exp $ */
2 /*
3  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2012, 2014 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <ctype.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <time.h>
24 
25 #include "mandoc.h"
26 #include "mandoc_aux.h"
27 #include "libmandoc.h"
28 #include "libroff.h"
29 
30 struct	tbl_phrase {
31 	char		 name;
32 	enum tbl_cellt	 key;
33 };
34 
35 /*
36  * FIXME: we can make this parse a lot nicer by, when an error is
37  * encountered in a layout key, bailing to the next key (i.e. to the
38  * next whitespace then continuing).
39  */
40 
41 #define	KEYS_MAX	 11
42 
43 static	const struct tbl_phrase keys[KEYS_MAX] = {
44 	{ 'c',		 TBL_CELL_CENTRE },
45 	{ 'r',		 TBL_CELL_RIGHT },
46 	{ 'l',		 TBL_CELL_LEFT },
47 	{ 'n',		 TBL_CELL_NUMBER },
48 	{ 's',		 TBL_CELL_SPAN },
49 	{ 'a',		 TBL_CELL_LONG },
50 	{ '^',		 TBL_CELL_DOWN },
51 	{ '-',		 TBL_CELL_HORIZ },
52 	{ '_',		 TBL_CELL_HORIZ },
53 	{ '=',		 TBL_CELL_DHORIZ }
54 };
55 
56 static	int		 mods(struct tbl_node *, struct tbl_cell *,
57 				int, const char *, int *);
58 static	int		 cell(struct tbl_node *, struct tbl_row *,
59 				int, const char *, int *);
60 static	void		 row(struct tbl_node *, int, const char *, int *);
61 static	struct tbl_cell *cell_alloc(struct tbl_node *, struct tbl_row *,
62 				enum tbl_cellt, int vert);
63 
64 
65 static int
66 mods(struct tbl_node *tbl, struct tbl_cell *cp,
67 		int ln, const char *p, int *pos)
68 {
69 	char		 buf[5];
70 	int		 i;
71 
72 	/* Not all types accept modifiers. */
73 
74 	switch (cp->pos) {
75 	case TBL_CELL_DOWN:
76 		/* FALLTHROUGH */
77 	case TBL_CELL_HORIZ:
78 		/* FALLTHROUGH */
79 	case TBL_CELL_DHORIZ:
80 		return(1);
81 	default:
82 		break;
83 	}
84 
85 mod:
86 	/*
87 	 * XXX: since, at least for now, modifiers are non-conflicting
88 	 * (are separable by value, regardless of position), we let
89 	 * modifiers come in any order.  The existing tbl doesn't let
90 	 * this happen.
91 	 */
92 	switch (p[*pos]) {
93 	case '\0':
94 		/* FALLTHROUGH */
95 	case ' ':
96 		/* FALLTHROUGH */
97 	case '\t':
98 		/* FALLTHROUGH */
99 	case ',':
100 		/* FALLTHROUGH */
101 	case '.':
102 		/* FALLTHROUGH */
103 	case '|':
104 		return(1);
105 	default:
106 		break;
107 	}
108 
109 	/* Throw away parenthesised expression. */
110 
111 	if ('(' == p[*pos]) {
112 		(*pos)++;
113 		while (p[*pos] && ')' != p[*pos])
114 			(*pos)++;
115 		if (')' == p[*pos]) {
116 			(*pos)++;
117 			goto mod;
118 		}
119 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
120 		    ln, *pos, NULL);
121 		return(0);
122 	}
123 
124 	/* Parse numerical spacing from modifier string. */
125 
126 	if (isdigit((unsigned char)p[*pos])) {
127 		for (i = 0; i < 4; i++) {
128 			if ( ! isdigit((unsigned char)p[*pos + i]))
129 				break;
130 			buf[i] = p[*pos + i];
131 		}
132 		buf[i] = '\0';
133 
134 		/* No greater than 4 digits. */
135 
136 		if (4 == i) {
137 			mandoc_msg(MANDOCERR_TBLLAYOUT,
138 			    tbl->parse, ln, *pos, NULL);
139 			return(0);
140 		}
141 
142 		*pos += i;
143 		cp->spacing = (size_t)atoi(buf);
144 
145 		goto mod;
146 		/* NOTREACHED */
147 	}
148 
149 	/* TODO: GNU has many more extensions. */
150 
151 	switch (tolower((unsigned char)p[(*pos)++])) {
152 	case 'z':
153 		cp->flags |= TBL_CELL_WIGN;
154 		goto mod;
155 	case 'u':
156 		cp->flags |= TBL_CELL_UP;
157 		goto mod;
158 	case 'e':
159 		cp->flags |= TBL_CELL_EQUAL;
160 		goto mod;
161 	case 't':
162 		cp->flags |= TBL_CELL_TALIGN;
163 		goto mod;
164 	case 'd':
165 		cp->flags |= TBL_CELL_BALIGN;
166 		goto mod;
167 	case 'w':  /* XXX for now, ignore minimal column width */
168 		goto mod;
169 	case 'x':
170 		cp->flags |= TBL_CELL_WMAX;
171 		goto mod;
172 	case 'f':
173 		break;
174 	case 'r':
175 		/* FALLTHROUGH */
176 	case 'b':
177 		/* FALLTHROUGH */
178 	case 'i':
179 		(*pos)--;
180 		break;
181 	default:
182 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
183 		    ln, *pos - 1, NULL);
184 		return(0);
185 	}
186 
187 	switch (tolower((unsigned char)p[(*pos)++])) {
188 	case '3':
189 		/* FALLTHROUGH */
190 	case 'b':
191 		cp->flags |= TBL_CELL_BOLD;
192 		goto mod;
193 	case '2':
194 		/* FALLTHROUGH */
195 	case 'i':
196 		cp->flags |= TBL_CELL_ITALIC;
197 		goto mod;
198 	case '1':
199 		/* FALLTHROUGH */
200 	case 'r':
201 		goto mod;
202 	default:
203 		break;
204 	}
205 	if (isalnum((unsigned char)p[*pos - 1])) {
206 		mandoc_vmsg(MANDOCERR_FT_BAD, tbl->parse,
207 		    ln, *pos - 1, "TS f%c", p[*pos - 1]);
208 		goto mod;
209 	}
210 
211 	mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
212 	    ln, *pos - 1, NULL);
213 	return(0);
214 }
215 
216 static int
217 cell(struct tbl_node *tbl, struct tbl_row *rp,
218 		int ln, const char *p, int *pos)
219 {
220 	int		 vert, i;
221 	enum tbl_cellt	 c;
222 
223 	/* Handle vertical lines. */
224 
225 	for (vert = 0; '|' == p[*pos]; ++*pos)
226 		vert++;
227 	while (' ' == p[*pos])
228 		(*pos)++;
229 
230 	/* Handle trailing vertical lines */
231 
232 	if ('.' == p[*pos] || '\0' == p[*pos]) {
233 		rp->vert = vert;
234 		return(1);
235 	}
236 
237 	/* Parse the column position (`c', `l', `r', ...). */
238 
239 	for (i = 0; i < KEYS_MAX; i++)
240 		if (tolower((unsigned char)p[*pos]) == keys[i].name)
241 			break;
242 
243 	if (KEYS_MAX == i) {
244 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
245 		    ln, *pos, NULL);
246 		return(0);
247 	}
248 
249 	c = keys[i].key;
250 
251 	/*
252 	 * If a span cell is found first, raise a warning and abort the
253 	 * parse.  If a span cell is found and the last layout element
254 	 * isn't a "normal" layout, bail.
255 	 *
256 	 * FIXME: recover from this somehow?
257 	 */
258 
259 	if (TBL_CELL_SPAN == c) {
260 		if (NULL == rp->first) {
261 			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
262 			    ln, *pos, NULL);
263 			return(0);
264 		} else if (rp->last)
265 			switch (rp->last->pos) {
266 			case TBL_CELL_HORIZ:
267 				/* FALLTHROUGH */
268 			case TBL_CELL_DHORIZ:
269 				mandoc_msg(MANDOCERR_TBLLAYOUT,
270 				    tbl->parse, ln, *pos, NULL);
271 				return(0);
272 			default:
273 				break;
274 			}
275 	}
276 
277 	/*
278 	 * If a vertical spanner is found, we may not be in the first
279 	 * row.
280 	 */
281 
282 	if (TBL_CELL_DOWN == c && rp == tbl->first_row) {
283 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL);
284 		return(0);
285 	}
286 
287 	(*pos)++;
288 
289 	/* Disallow adjacent spacers. */
290 
291 	if (vert > 2) {
292 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL);
293 		return(0);
294 	}
295 
296 	/* Allocate cell then parse its modifiers. */
297 
298 	return(mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos));
299 }
300 
301 static void
302 row(struct tbl_node *tbl, int ln, const char *p, int *pos)
303 {
304 	struct tbl_row	*rp;
305 
306 row:	/*
307 	 * EBNF describing this section:
308 	 *
309 	 * row		::= row_list [:space:]* [.]?[\n]
310 	 * row_list	::= [:space:]* row_elem row_tail
311 	 * row_tail	::= [:space:]*[,] row_list |
312 	 *                  epsilon
313 	 * row_elem	::= [\t\ ]*[:alpha:]+
314 	 */
315 
316 	rp = mandoc_calloc(1, sizeof(struct tbl_row));
317 	if (tbl->last_row)
318 		tbl->last_row->next = rp;
319 	else
320 		tbl->first_row = rp;
321 	tbl->last_row = rp;
322 
323 cell:
324 	while (isspace((unsigned char)p[*pos]))
325 		(*pos)++;
326 
327 	/* Safely exit layout context. */
328 
329 	if ('.' == p[*pos]) {
330 		tbl->part = TBL_PART_DATA;
331 		if (NULL == tbl->first_row)
332 			mandoc_msg(MANDOCERR_TBLNOLAYOUT,
333 			    tbl->parse, ln, *pos, NULL);
334 		(*pos)++;
335 		return;
336 	}
337 
338 	/* End (and possibly restart) a row. */
339 
340 	if (',' == p[*pos]) {
341 		(*pos)++;
342 		goto row;
343 	} else if ('\0' == p[*pos])
344 		return;
345 
346 	if ( ! cell(tbl, rp, ln, p, pos))
347 		return;
348 
349 	goto cell;
350 	/* NOTREACHED */
351 }
352 
353 int
354 tbl_layout(struct tbl_node *tbl, int ln, const char *p)
355 {
356 	int		 pos;
357 
358 	pos = 0;
359 	row(tbl, ln, p, &pos);
360 
361 	/* Always succeed. */
362 	return(1);
363 }
364 
365 static struct tbl_cell *
366 cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos,
367 		int vert)
368 {
369 	struct tbl_cell	*p, *pp;
370 	struct tbl_head	*h, *hp;
371 
372 	p = mandoc_calloc(1, sizeof(struct tbl_cell));
373 
374 	if (NULL != (pp = rp->last)) {
375 		pp->next = p;
376 		h = pp->head->next;
377 	} else {
378 		rp->first = p;
379 		h = tbl->first_head;
380 	}
381 	rp->last = p;
382 
383 	p->pos = pos;
384 	p->vert = vert;
385 
386 	/* Re-use header. */
387 
388 	if (h) {
389 		p->head = h;
390 		return(p);
391 	}
392 
393 	hp = mandoc_calloc(1, sizeof(struct tbl_head));
394 	hp->ident = tbl->opts.cols++;
395 	hp->vert = vert;
396 
397 	if (tbl->last_head) {
398 		hp->prev = tbl->last_head;
399 		tbl->last_head->next = hp;
400 	} else
401 		tbl->first_head = hp;
402 	tbl->last_head = hp;
403 
404 	p->head = hp;
405 	return(p);
406 }
407