xref: /openbsd-src/usr.bin/mandoc/tbl_layout.c (revision 48950c12d106c85f315112191a0228d7b83b9510)
1 /*	$Id: tbl_layout.c,v 1.11 2012/05/26 20:03:34 schwarze Exp $ */
2 /*
3  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2012 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <assert.h>
19 #include <ctype.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <time.h>
23 
24 #include "mandoc.h"
25 #include "libmandoc.h"
26 #include "libroff.h"
27 
28 struct	tbl_phrase {
29 	char		 name;
30 	enum tbl_cellt	 key;
31 };
32 
33 /*
34  * FIXME: we can make this parse a lot nicer by, when an error is
35  * encountered in a layout key, bailing to the next key (i.e. to the
36  * next whitespace then continuing).
37  */
38 
39 #define	KEYS_MAX	 11
40 
41 static	const struct tbl_phrase keys[KEYS_MAX] = {
42 	{ 'c',		 TBL_CELL_CENTRE },
43 	{ 'r',		 TBL_CELL_RIGHT },
44 	{ 'l',		 TBL_CELL_LEFT },
45 	{ 'n',		 TBL_CELL_NUMBER },
46 	{ 's',		 TBL_CELL_SPAN },
47 	{ 'a',		 TBL_CELL_LONG },
48 	{ '^',		 TBL_CELL_DOWN },
49 	{ '-',		 TBL_CELL_HORIZ },
50 	{ '_',		 TBL_CELL_HORIZ },
51 	{ '=',		 TBL_CELL_DHORIZ }
52 };
53 
54 static	int		 mods(struct tbl_node *, struct tbl_cell *,
55 				int, const char *, int *);
56 static	int		 cell(struct tbl_node *, struct tbl_row *,
57 				int, const char *, int *);
58 static	void		 row(struct tbl_node *, int, const char *, int *);
59 static	struct tbl_cell *cell_alloc(struct tbl_node *, struct tbl_row *,
60 				enum tbl_cellt, int vert);
61 
62 static int
63 mods(struct tbl_node *tbl, struct tbl_cell *cp,
64 		int ln, const char *p, int *pos)
65 {
66 	char		 buf[5];
67 	int		 i;
68 
69 	/* Not all types accept modifiers. */
70 
71 	switch (cp->pos) {
72 	case (TBL_CELL_DOWN):
73 		/* FALLTHROUGH */
74 	case (TBL_CELL_HORIZ):
75 		/* FALLTHROUGH */
76 	case (TBL_CELL_DHORIZ):
77 		return(1);
78 	default:
79 		break;
80 	}
81 
82 mod:
83 	/*
84 	 * XXX: since, at least for now, modifiers are non-conflicting
85 	 * (are separable by value, regardless of position), we let
86 	 * modifiers come in any order.  The existing tbl doesn't let
87 	 * this happen.
88 	 */
89 	switch (p[*pos]) {
90 	case ('\0'):
91 		/* FALLTHROUGH */
92 	case (' '):
93 		/* FALLTHROUGH */
94 	case ('\t'):
95 		/* FALLTHROUGH */
96 	case (','):
97 		/* FALLTHROUGH */
98 	case ('.'):
99 		return(1);
100 	default:
101 		break;
102 	}
103 
104 	/* Throw away parenthesised expression. */
105 
106 	if ('(' == p[*pos]) {
107 		(*pos)++;
108 		while (p[*pos] && ')' != p[*pos])
109 			(*pos)++;
110 		if (')' == p[*pos]) {
111 			(*pos)++;
112 			goto mod;
113 		}
114 		mandoc_msg(MANDOCERR_TBLLAYOUT,
115 				tbl->parse, ln, *pos, NULL);
116 		return(0);
117 	}
118 
119 	/* Parse numerical spacing from modifier string. */
120 
121 	if (isdigit((unsigned char)p[*pos])) {
122 		for (i = 0; i < 4; i++) {
123 			if ( ! isdigit((unsigned char)p[*pos + i]))
124 				break;
125 			buf[i] = p[*pos + i];
126 		}
127 		buf[i] = '\0';
128 
129 		/* No greater than 4 digits. */
130 
131 		if (4 == i) {
132 			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
133 					ln, *pos, NULL);
134 			return(0);
135 		}
136 
137 		*pos += i;
138 		cp->spacing = (size_t)atoi(buf);
139 
140 		goto mod;
141 		/* NOTREACHED */
142 	}
143 
144 	/* TODO: GNU has many more extensions. */
145 
146 	switch (tolower((unsigned char)p[(*pos)++])) {
147 	case ('z'):
148 		cp->flags |= TBL_CELL_WIGN;
149 		goto mod;
150 	case ('u'):
151 		cp->flags |= TBL_CELL_UP;
152 		goto mod;
153 	case ('e'):
154 		cp->flags |= TBL_CELL_EQUAL;
155 		goto mod;
156 	case ('t'):
157 		cp->flags |= TBL_CELL_TALIGN;
158 		goto mod;
159 	case ('d'):
160 		cp->flags |= TBL_CELL_BALIGN;
161 		goto mod;
162 	case ('w'):  /* XXX for now, ignore minimal column width */
163 		goto mod;
164 	case ('f'):
165 		break;
166 	case ('r'):
167 		/* FALLTHROUGH */
168 	case ('b'):
169 		/* FALLTHROUGH */
170 	case ('i'):
171 		(*pos)--;
172 		break;
173 	default:
174 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
175 				ln, *pos - 1, NULL);
176 		return(0);
177 	}
178 
179 	switch (tolower((unsigned char)p[(*pos)++])) {
180 	case ('3'):
181 		/* FALLTHROUGH */
182 	case ('b'):
183 		cp->flags |= TBL_CELL_BOLD;
184 		goto mod;
185 	case ('2'):
186 		/* FALLTHROUGH */
187 	case ('i'):
188 		cp->flags |= TBL_CELL_ITALIC;
189 		goto mod;
190 	case ('1'):
191 		/* FALLTHROUGH */
192 	case ('r'):
193 		goto mod;
194 	default:
195 		break;
196 	}
197 
198 	mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
199 			ln, *pos - 1, NULL);
200 	return(0);
201 }
202 
203 static int
204 cell(struct tbl_node *tbl, struct tbl_row *rp,
205 		int ln, const char *p, int *pos)
206 {
207 	int		 vert, i;
208 	enum tbl_cellt	 c;
209 
210 	/* Handle vertical lines. */
211 
212 	for (vert = 0; '|' == p[*pos]; ++*pos)
213 		vert++;
214 	while (' ' == p[*pos])
215 		(*pos)++;
216 
217 	/* Parse the column position (`c', `l', `r', ...). */
218 
219 	for (i = 0; i < KEYS_MAX; i++)
220 		if (tolower((unsigned char)p[*pos]) == keys[i].name)
221 			break;
222 
223 	if (KEYS_MAX == i) {
224 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
225 				ln, *pos, NULL);
226 		return(0);
227 	}
228 
229 	c = keys[i].key;
230 
231 	/*
232 	 * If a span cell is found first, raise a warning and abort the
233 	 * parse.  If a span cell is found and the last layout element
234 	 * isn't a "normal" layout, bail.
235 	 *
236 	 * FIXME: recover from this somehow?
237 	 */
238 
239 	if (TBL_CELL_SPAN == c) {
240 		if (NULL == rp->first) {
241 			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
242 					ln, *pos, NULL);
243 			return(0);
244 		} else if (rp->last)
245 			switch (rp->last->pos) {
246 			case (TBL_CELL_HORIZ):
247 			case (TBL_CELL_DHORIZ):
248 				mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
249 						ln, *pos, NULL);
250 				return(0);
251 			default:
252 				break;
253 			}
254 	}
255 
256 	/*
257 	 * If a vertical spanner is found, we may not be in the first
258 	 * row.
259 	 */
260 
261 	if (TBL_CELL_DOWN == c && rp == tbl->first_row) {
262 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL);
263 		return(0);
264 	}
265 
266 	(*pos)++;
267 
268 	/* Disallow adjacent spacers. */
269 
270 	if (vert > 2) {
271 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL);
272 		return(0);
273 	}
274 
275 	/* Allocate cell then parse its modifiers. */
276 
277 	return(mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos));
278 }
279 
280 
281 static void
282 row(struct tbl_node *tbl, int ln, const char *p, int *pos)
283 {
284 	struct tbl_row	*rp;
285 
286 row:	/*
287 	 * EBNF describing this section:
288 	 *
289 	 * row		::= row_list [:space:]* [.]?[\n]
290 	 * row_list	::= [:space:]* row_elem row_tail
291 	 * row_tail	::= [:space:]*[,] row_list |
292 	 *                  epsilon
293 	 * row_elem	::= [\t\ ]*[:alpha:]+
294 	 */
295 
296 	rp = mandoc_calloc(1, sizeof(struct tbl_row));
297 	if (tbl->last_row)
298 		tbl->last_row->next = rp;
299 	else
300 		tbl->first_row = rp;
301 	tbl->last_row = rp;
302 
303 cell:
304 	while (isspace((unsigned char)p[*pos]))
305 		(*pos)++;
306 
307 	/* Safely exit layout context. */
308 
309 	if ('.' == p[*pos]) {
310 		tbl->part = TBL_PART_DATA;
311 		if (NULL == tbl->first_row)
312 			mandoc_msg(MANDOCERR_TBLNOLAYOUT, tbl->parse,
313 					ln, *pos, NULL);
314 		(*pos)++;
315 		return;
316 	}
317 
318 	/* End (and possibly restart) a row. */
319 
320 	if (',' == p[*pos]) {
321 		(*pos)++;
322 		goto row;
323 	} else if ('\0' == p[*pos])
324 		return;
325 
326 	if ( ! cell(tbl, rp, ln, p, pos))
327 		return;
328 
329 	goto cell;
330 	/* NOTREACHED */
331 }
332 
333 int
334 tbl_layout(struct tbl_node *tbl, int ln, const char *p)
335 {
336 	int		 pos;
337 
338 	pos = 0;
339 	row(tbl, ln, p, &pos);
340 
341 	/* Always succeed. */
342 	return(1);
343 }
344 
345 static struct tbl_cell *
346 cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos,
347 		int vert)
348 {
349 	struct tbl_cell	*p, *pp;
350 	struct tbl_head	*h, *hp;
351 
352 	p = mandoc_calloc(1, sizeof(struct tbl_cell));
353 
354 	if (NULL != (pp = rp->last)) {
355 		pp->next = p;
356 		h = pp->head->next;
357 	} else {
358 		rp->first = p;
359 		h = tbl->first_head;
360 	}
361 	rp->last = p;
362 
363 	p->pos = pos;
364 	p->vert = vert;
365 
366 	/* Re-use header. */
367 
368 	if (h) {
369 		p->head = h;
370 		return(p);
371 	}
372 
373 	hp = mandoc_calloc(1, sizeof(struct tbl_head));
374 	hp->ident = tbl->opts.cols++;
375 	hp->vert = vert;
376 
377 	if (tbl->last_head) {
378 		hp->prev = tbl->last_head;
379 		tbl->last_head->next = hp;
380 	} else
381 		tbl->first_head = hp;
382 	tbl->last_head = hp;
383 
384 	p->head = hp;
385 	return(p);
386 }
387