xref: /openbsd-src/usr.bin/mandoc/tbl_layout.c (revision be38755c412cc72cb8d40f51ea70c9893196afff)
1 /*	$Id: tbl_layout.c,v 1.10 2011/09/18 15:54:48 schwarze Exp $ */
2 /*
3  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <assert.h>
18 #include <ctype.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 
23 #include "mandoc.h"
24 #include "libmandoc.h"
25 #include "libroff.h"
26 
27 struct	tbl_phrase {
28 	char		 name;
29 	enum tbl_cellt	 key;
30 };
31 
32 /*
33  * FIXME: we can make this parse a lot nicer by, when an error is
34  * encountered in a layout key, bailing to the next key (i.e. to the
35  * next whitespace then continuing).
36  */
37 
38 #define	KEYS_MAX	 11
39 
40 static	const struct tbl_phrase keys[KEYS_MAX] = {
41 	{ 'c',		 TBL_CELL_CENTRE },
42 	{ 'r',		 TBL_CELL_RIGHT },
43 	{ 'l',		 TBL_CELL_LEFT },
44 	{ 'n',		 TBL_CELL_NUMBER },
45 	{ 's',		 TBL_CELL_SPAN },
46 	{ 'a',		 TBL_CELL_LONG },
47 	{ '^',		 TBL_CELL_DOWN },
48 	{ '-',		 TBL_CELL_HORIZ },
49 	{ '_',		 TBL_CELL_HORIZ },
50 	{ '=',		 TBL_CELL_DHORIZ },
51 	{ '|',		 TBL_CELL_VERT }
52 };
53 
54 static	int		 mods(struct tbl_node *, struct tbl_cell *,
55 				int, const char *, int *);
56 static	int		 cell(struct tbl_node *, struct tbl_row *,
57 				int, const char *, int *);
58 static	void		 row(struct tbl_node *, int, const char *, int *);
59 static	struct tbl_cell *cell_alloc(struct tbl_node *,
60 				struct tbl_row *, enum tbl_cellt);
61 static	void		 head_adjust(const struct tbl_cell *,
62 				struct tbl_head *);
63 
64 static int
65 mods(struct tbl_node *tbl, struct tbl_cell *cp,
66 		int ln, const char *p, int *pos)
67 {
68 	char		 buf[5];
69 	int		 i;
70 
71 	/* Not all types accept modifiers. */
72 
73 	switch (cp->pos) {
74 	case (TBL_CELL_DOWN):
75 		/* FALLTHROUGH */
76 	case (TBL_CELL_HORIZ):
77 		/* FALLTHROUGH */
78 	case (TBL_CELL_DHORIZ):
79 		/* FALLTHROUGH */
80 	case (TBL_CELL_VERT):
81 		/* FALLTHROUGH */
82 	case (TBL_CELL_DVERT):
83 		return(1);
84 	default:
85 		break;
86 	}
87 
88 mod:
89 	/*
90 	 * XXX: since, at least for now, modifiers are non-conflicting
91 	 * (are separable by value, regardless of position), we let
92 	 * modifiers come in any order.  The existing tbl doesn't let
93 	 * this happen.
94 	 */
95 	switch (p[*pos]) {
96 	case ('\0'):
97 		/* FALLTHROUGH */
98 	case (' '):
99 		/* FALLTHROUGH */
100 	case ('\t'):
101 		/* FALLTHROUGH */
102 	case (','):
103 		/* FALLTHROUGH */
104 	case ('.'):
105 		return(1);
106 	default:
107 		break;
108 	}
109 
110 	/* Throw away parenthesised expression. */
111 
112 	if ('(' == p[*pos]) {
113 		(*pos)++;
114 		while (p[*pos] && ')' != p[*pos])
115 			(*pos)++;
116 		if (')' == p[*pos]) {
117 			(*pos)++;
118 			goto mod;
119 		}
120 		mandoc_msg(MANDOCERR_TBLLAYOUT,
121 				tbl->parse, ln, *pos, NULL);
122 		return(0);
123 	}
124 
125 	/* Parse numerical spacing from modifier string. */
126 
127 	if (isdigit((unsigned char)p[*pos])) {
128 		for (i = 0; i < 4; i++) {
129 			if ( ! isdigit((unsigned char)p[*pos + i]))
130 				break;
131 			buf[i] = p[*pos + i];
132 		}
133 		buf[i] = '\0';
134 
135 		/* No greater than 4 digits. */
136 
137 		if (4 == i) {
138 			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
139 					ln, *pos, NULL);
140 			return(0);
141 		}
142 
143 		*pos += i;
144 		cp->spacing = (size_t)atoi(buf);
145 
146 		goto mod;
147 		/* NOTREACHED */
148 	}
149 
150 	/* TODO: GNU has many more extensions. */
151 
152 	switch (tolower((unsigned char)p[(*pos)++])) {
153 	case ('z'):
154 		cp->flags |= TBL_CELL_WIGN;
155 		goto mod;
156 	case ('u'):
157 		cp->flags |= TBL_CELL_UP;
158 		goto mod;
159 	case ('e'):
160 		cp->flags |= TBL_CELL_EQUAL;
161 		goto mod;
162 	case ('t'):
163 		cp->flags |= TBL_CELL_TALIGN;
164 		goto mod;
165 	case ('d'):
166 		cp->flags |= TBL_CELL_BALIGN;
167 		goto mod;
168 	case ('w'):  /* XXX for now, ignore minimal column width */
169 		goto mod;
170 	case ('f'):
171 		break;
172 	case ('r'):
173 		/* FALLTHROUGH */
174 	case ('b'):
175 		/* FALLTHROUGH */
176 	case ('i'):
177 		(*pos)--;
178 		break;
179 	default:
180 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
181 				ln, *pos - 1, NULL);
182 		return(0);
183 	}
184 
185 	switch (tolower((unsigned char)p[(*pos)++])) {
186 	case ('3'):
187 		/* FALLTHROUGH */
188 	case ('b'):
189 		cp->flags |= TBL_CELL_BOLD;
190 		goto mod;
191 	case ('2'):
192 		/* FALLTHROUGH */
193 	case ('i'):
194 		cp->flags |= TBL_CELL_ITALIC;
195 		goto mod;
196 	case ('1'):
197 		/* FALLTHROUGH */
198 	case ('r'):
199 		goto mod;
200 	default:
201 		break;
202 	}
203 
204 	mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
205 			ln, *pos - 1, NULL);
206 	return(0);
207 }
208 
209 static int
210 cell(struct tbl_node *tbl, struct tbl_row *rp,
211 		int ln, const char *p, int *pos)
212 {
213 	int		 i;
214 	enum tbl_cellt	 c;
215 
216 	/* Parse the column position (`r', `R', `|', ...). */
217 
218 	for (i = 0; i < KEYS_MAX; i++)
219 		if (tolower((unsigned char)p[*pos]) == keys[i].name)
220 			break;
221 
222 	if (KEYS_MAX == i) {
223 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
224 				ln, *pos, NULL);
225 		return(0);
226 	}
227 
228 	c = keys[i].key;
229 
230 	/*
231 	 * If a span cell is found first, raise a warning and abort the
232 	 * parse.  If a span cell is found and the last layout element
233 	 * isn't a "normal" layout, bail.
234 	 *
235 	 * FIXME: recover from this somehow?
236 	 */
237 
238 	if (TBL_CELL_SPAN == c) {
239 		if (NULL == rp->first) {
240 			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
241 					ln, *pos, NULL);
242 			return(0);
243 		} else if (rp->last)
244 			switch (rp->last->pos) {
245 			case (TBL_CELL_VERT):
246 			case (TBL_CELL_DVERT):
247 			case (TBL_CELL_HORIZ):
248 			case (TBL_CELL_DHORIZ):
249 				mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
250 						ln, *pos, NULL);
251 				return(0);
252 			default:
253 				break;
254 			}
255 	}
256 
257 	/*
258 	 * If a vertical spanner is found, we may not be in the first
259 	 * row.
260 	 */
261 
262 	if (TBL_CELL_DOWN == c && rp == tbl->first_row) {
263 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL);
264 		return(0);
265 	}
266 
267 	(*pos)++;
268 
269 	/* Extra check for the double-vertical. */
270 
271 	if (TBL_CELL_VERT == c && '|' == p[*pos]) {
272 		(*pos)++;
273 		c = TBL_CELL_DVERT;
274 	}
275 
276 	/* Disallow adjacent spacers. */
277 
278 	if (rp->last && (TBL_CELL_VERT == c || TBL_CELL_DVERT == c) &&
279 			(TBL_CELL_VERT == rp->last->pos ||
280 			 TBL_CELL_DVERT == rp->last->pos)) {
281 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL);
282 		return(0);
283 	}
284 
285 	/* Allocate cell then parse its modifiers. */
286 
287 	return(mods(tbl, cell_alloc(tbl, rp, c), ln, p, pos));
288 }
289 
290 
291 static void
292 row(struct tbl_node *tbl, int ln, const char *p, int *pos)
293 {
294 	struct tbl_row	*rp;
295 
296 row:	/*
297 	 * EBNF describing this section:
298 	 *
299 	 * row		::= row_list [:space:]* [.]?[\n]
300 	 * row_list	::= [:space:]* row_elem row_tail
301 	 * row_tail	::= [:space:]*[,] row_list |
302 	 *                  epsilon
303 	 * row_elem	::= [\t\ ]*[:alpha:]+
304 	 */
305 
306 	rp = mandoc_calloc(1, sizeof(struct tbl_row));
307 	if (tbl->last_row) {
308 		tbl->last_row->next = rp;
309 		tbl->last_row = rp;
310 	} else
311 		tbl->last_row = tbl->first_row = rp;
312 
313 cell:
314 	while (isspace((unsigned char)p[*pos]))
315 		(*pos)++;
316 
317 	/* Safely exit layout context. */
318 
319 	if ('.' == p[*pos]) {
320 		tbl->part = TBL_PART_DATA;
321 		if (NULL == tbl->first_row)
322 			mandoc_msg(MANDOCERR_TBLNOLAYOUT, tbl->parse,
323 					ln, *pos, NULL);
324 		(*pos)++;
325 		return;
326 	}
327 
328 	/* End (and possibly restart) a row. */
329 
330 	if (',' == p[*pos]) {
331 		(*pos)++;
332 		goto row;
333 	} else if ('\0' == p[*pos])
334 		return;
335 
336 	if ( ! cell(tbl, rp, ln, p, pos))
337 		return;
338 
339 	goto cell;
340 	/* NOTREACHED */
341 }
342 
343 int
344 tbl_layout(struct tbl_node *tbl, int ln, const char *p)
345 {
346 	int		 pos;
347 
348 	pos = 0;
349 	row(tbl, ln, p, &pos);
350 
351 	/* Always succeed. */
352 	return(1);
353 }
354 
355 static struct tbl_cell *
356 cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos)
357 {
358 	struct tbl_cell	*p, *pp;
359 	struct tbl_head	*h, *hp;
360 
361 	p = mandoc_calloc(1, sizeof(struct tbl_cell));
362 
363 	if (NULL != (pp = rp->last)) {
364 		rp->last->next = p;
365 		rp->last = p;
366 	} else
367 		rp->last = rp->first = p;
368 
369 	p->pos = pos;
370 
371 	/*
372 	 * This is a little bit complicated.  Here we determine the
373 	 * header the corresponds to a cell.  We add headers dynamically
374 	 * when need be or re-use them, otherwise.  As an example, given
375 	 * the following:
376 	 *
377 	 * 	1  c || l
378 	 * 	2  | c | l
379 	 * 	3  l l
380 	 * 	3  || c | l |.
381 	 *
382 	 * We first add the new headers (as there are none) in (1); then
383 	 * in (2) we insert the first spanner (as it doesn't match up
384 	 * with the header); then we re-use the prior data headers,
385 	 * skipping over the spanners; then we re-use everything and add
386 	 * a last spanner.  Note that VERT headers are made into DVERT
387 	 * ones.
388 	 */
389 
390 	h = pp ? pp->head->next : tbl->first_head;
391 
392 	if (h) {
393 		/* Re-use data header. */
394 		if (TBL_HEAD_DATA == h->pos &&
395 				(TBL_CELL_VERT != p->pos &&
396 				 TBL_CELL_DVERT != p->pos)) {
397 			p->head = h;
398 			return(p);
399 		}
400 
401 		/* Re-use spanner header. */
402 		if (TBL_HEAD_DATA != h->pos &&
403 				(TBL_CELL_VERT == p->pos ||
404 				 TBL_CELL_DVERT == p->pos)) {
405 			head_adjust(p, h);
406 			p->head = h;
407 			return(p);
408 		}
409 
410 		/* Right-shift headers with a new spanner. */
411 		if (TBL_HEAD_DATA == h->pos &&
412 				(TBL_CELL_VERT == p->pos ||
413 				 TBL_CELL_DVERT == p->pos)) {
414 			hp = mandoc_calloc(1, sizeof(struct tbl_head));
415 			hp->ident = tbl->opts.cols++;
416 			hp->prev = h->prev;
417 			if (h->prev)
418 				h->prev->next = hp;
419 			if (h == tbl->first_head)
420 				tbl->first_head = hp;
421 			h->prev = hp;
422 			hp->next = h;
423 			head_adjust(p, hp);
424 			p->head = hp;
425 			return(p);
426 		}
427 
428 		if (NULL != (h = h->next)) {
429 			head_adjust(p, h);
430 			p->head = h;
431 			return(p);
432 		}
433 
434 		/* Fall through to default case... */
435 	}
436 
437 	hp = mandoc_calloc(1, sizeof(struct tbl_head));
438 	hp->ident = tbl->opts.cols++;
439 
440 	if (tbl->last_head) {
441 		hp->prev = tbl->last_head;
442 		tbl->last_head->next = hp;
443 		tbl->last_head = hp;
444 	} else
445 		tbl->last_head = tbl->first_head = hp;
446 
447 	head_adjust(p, hp);
448 	p->head = hp;
449 	return(p);
450 }
451 
452 static void
453 head_adjust(const struct tbl_cell *cellp, struct tbl_head *head)
454 {
455 	if (TBL_CELL_VERT != cellp->pos &&
456 			TBL_CELL_DVERT != cellp->pos) {
457 		head->pos = TBL_HEAD_DATA;
458 		return;
459 	}
460 
461 	if (TBL_CELL_VERT == cellp->pos)
462 		if (TBL_HEAD_DVERT != head->pos)
463 			head->pos = TBL_HEAD_VERT;
464 
465 	if (TBL_CELL_DVERT == cellp->pos)
466 		head->pos = TBL_HEAD_DVERT;
467 }
468 
469