xref: /openbsd-src/usr.bin/mandoc/tbl_layout.c (revision 6f05df2d9be0954bec42d51d943d77bd250fb664)
1 /*	$OpenBSD: tbl_layout.c,v 1.17 2014/11/25 05:39:00 schwarze Exp $ */
2 /*
3  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2012, 2014 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <ctype.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <time.h>
24 
25 #include "mandoc.h"
26 #include "mandoc_aux.h"
27 #include "libmandoc.h"
28 #include "libroff.h"
29 
30 struct	tbl_phrase {
31 	char		 name;
32 	enum tbl_cellt	 key;
33 };
34 
35 /*
36  * FIXME: we can make this parse a lot nicer by, when an error is
37  * encountered in a layout key, bailing to the next key (i.e. to the
38  * next whitespace then continuing).
39  */
40 
41 #define	KEYS_MAX	 11
42 
43 static	const struct tbl_phrase keys[KEYS_MAX] = {
44 	{ 'c',		 TBL_CELL_CENTRE },
45 	{ 'r',		 TBL_CELL_RIGHT },
46 	{ 'l',		 TBL_CELL_LEFT },
47 	{ 'n',		 TBL_CELL_NUMBER },
48 	{ 's',		 TBL_CELL_SPAN },
49 	{ 'a',		 TBL_CELL_LONG },
50 	{ '^',		 TBL_CELL_DOWN },
51 	{ '-',		 TBL_CELL_HORIZ },
52 	{ '_',		 TBL_CELL_HORIZ },
53 	{ '=',		 TBL_CELL_DHORIZ }
54 };
55 
56 static	int		 mods(struct tbl_node *, struct tbl_cell *,
57 				int, const char *, int *);
58 static	int		 cell(struct tbl_node *, struct tbl_row *,
59 				int, const char *, int *);
60 static	struct tbl_cell *cell_alloc(struct tbl_node *, struct tbl_row *,
61 				enum tbl_cellt, int vert);
62 
63 
64 static int
65 mods(struct tbl_node *tbl, struct tbl_cell *cp,
66 		int ln, const char *p, int *pos)
67 {
68 	char		 buf[5];
69 	int		 i;
70 
71 	/* Not all types accept modifiers. */
72 
73 	switch (cp->pos) {
74 	case TBL_CELL_DOWN:
75 		/* FALLTHROUGH */
76 	case TBL_CELL_HORIZ:
77 		/* FALLTHROUGH */
78 	case TBL_CELL_DHORIZ:
79 		return(1);
80 	default:
81 		break;
82 	}
83 
84 mod:
85 	/*
86 	 * XXX: since, at least for now, modifiers are non-conflicting
87 	 * (are separable by value, regardless of position), we let
88 	 * modifiers come in any order.  The existing tbl doesn't let
89 	 * this happen.
90 	 */
91 	switch (p[*pos]) {
92 	case '\0':
93 		/* FALLTHROUGH */
94 	case ' ':
95 		/* FALLTHROUGH */
96 	case '\t':
97 		/* FALLTHROUGH */
98 	case ',':
99 		/* FALLTHROUGH */
100 	case '.':
101 		/* FALLTHROUGH */
102 	case '|':
103 		return(1);
104 	default:
105 		break;
106 	}
107 
108 	/* Throw away parenthesised expression. */
109 
110 	if ('(' == p[*pos]) {
111 		(*pos)++;
112 		while (p[*pos] && ')' != p[*pos])
113 			(*pos)++;
114 		if (')' == p[*pos]) {
115 			(*pos)++;
116 			goto mod;
117 		}
118 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
119 		    ln, *pos, NULL);
120 		return(0);
121 	}
122 
123 	/* Parse numerical spacing from modifier string. */
124 
125 	if (isdigit((unsigned char)p[*pos])) {
126 		for (i = 0; i < 4; i++) {
127 			if ( ! isdigit((unsigned char)p[*pos + i]))
128 				break;
129 			buf[i] = p[*pos + i];
130 		}
131 		buf[i] = '\0';
132 
133 		/* No greater than 4 digits. */
134 
135 		if (4 == i) {
136 			mandoc_msg(MANDOCERR_TBLLAYOUT,
137 			    tbl->parse, ln, *pos, NULL);
138 			return(0);
139 		}
140 
141 		*pos += i;
142 		cp->spacing = (size_t)atoi(buf);
143 
144 		goto mod;
145 		/* NOTREACHED */
146 	}
147 
148 	/* TODO: GNU has many more extensions. */
149 
150 	switch (tolower((unsigned char)p[(*pos)++])) {
151 	case 'z':
152 		cp->flags |= TBL_CELL_WIGN;
153 		goto mod;
154 	case 'u':
155 		cp->flags |= TBL_CELL_UP;
156 		goto mod;
157 	case 'e':
158 		cp->flags |= TBL_CELL_EQUAL;
159 		goto mod;
160 	case 't':
161 		cp->flags |= TBL_CELL_TALIGN;
162 		goto mod;
163 	case 'd':
164 		cp->flags |= TBL_CELL_BALIGN;
165 		goto mod;
166 	case 'w':  /* XXX for now, ignore minimal column width */
167 		goto mod;
168 	case 'x':
169 		cp->flags |= TBL_CELL_WMAX;
170 		goto mod;
171 	case 'f':
172 		break;
173 	case 'r':
174 		/* FALLTHROUGH */
175 	case 'b':
176 		/* FALLTHROUGH */
177 	case 'i':
178 		(*pos)--;
179 		break;
180 	default:
181 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
182 		    ln, *pos - 1, NULL);
183 		return(0);
184 	}
185 
186 	switch (tolower((unsigned char)p[(*pos)++])) {
187 	case '3':
188 		/* FALLTHROUGH */
189 	case 'b':
190 		cp->flags |= TBL_CELL_BOLD;
191 		goto mod;
192 	case '2':
193 		/* FALLTHROUGH */
194 	case 'i':
195 		cp->flags |= TBL_CELL_ITALIC;
196 		goto mod;
197 	case '1':
198 		/* FALLTHROUGH */
199 	case 'r':
200 		goto mod;
201 	default:
202 		break;
203 	}
204 	if (isalnum((unsigned char)p[*pos - 1])) {
205 		mandoc_vmsg(MANDOCERR_FT_BAD, tbl->parse,
206 		    ln, *pos - 1, "TS f%c", p[*pos - 1]);
207 		goto mod;
208 	}
209 
210 	mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
211 	    ln, *pos - 1, NULL);
212 	return(0);
213 }
214 
215 static int
216 cell(struct tbl_node *tbl, struct tbl_row *rp,
217 		int ln, const char *p, int *pos)
218 {
219 	int		 vert, i;
220 	enum tbl_cellt	 c;
221 
222 	/* Handle vertical lines. */
223 
224 	for (vert = 0; '|' == p[*pos]; ++*pos)
225 		vert++;
226 	while (' ' == p[*pos])
227 		(*pos)++;
228 
229 	/* Handle trailing vertical lines */
230 
231 	if ('.' == p[*pos] || '\0' == p[*pos]) {
232 		rp->vert = vert;
233 		return(1);
234 	}
235 
236 	/* Parse the column position (`c', `l', `r', ...). */
237 
238 	for (i = 0; i < KEYS_MAX; i++)
239 		if (tolower((unsigned char)p[*pos]) == keys[i].name)
240 			break;
241 
242 	if (KEYS_MAX == i) {
243 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
244 		    ln, *pos, NULL);
245 		return(0);
246 	}
247 
248 	c = keys[i].key;
249 
250 	/*
251 	 * If a span cell is found first, raise a warning and abort the
252 	 * parse.  If a span cell is found and the last layout element
253 	 * isn't a "normal" layout, bail.
254 	 *
255 	 * FIXME: recover from this somehow?
256 	 */
257 
258 	if (TBL_CELL_SPAN == c) {
259 		if (NULL == rp->first) {
260 			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
261 			    ln, *pos, NULL);
262 			return(0);
263 		} else if (rp->last)
264 			switch (rp->last->pos) {
265 			case TBL_CELL_HORIZ:
266 				/* FALLTHROUGH */
267 			case TBL_CELL_DHORIZ:
268 				mandoc_msg(MANDOCERR_TBLLAYOUT,
269 				    tbl->parse, ln, *pos, NULL);
270 				return(0);
271 			default:
272 				break;
273 			}
274 	}
275 
276 	/*
277 	 * If a vertical spanner is found, we may not be in the first
278 	 * row.
279 	 */
280 
281 	if (TBL_CELL_DOWN == c && rp == tbl->first_row) {
282 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL);
283 		return(0);
284 	}
285 
286 	(*pos)++;
287 
288 	/* Disallow adjacent spacers. */
289 
290 	if (vert > 2) {
291 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL);
292 		return(0);
293 	}
294 
295 	/* Allocate cell then parse its modifiers. */
296 
297 	return(mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos));
298 }
299 
300 int
301 tbl_layout(struct tbl_node *tbl, int ln, const char *p)
302 {
303 	struct tbl_row	*rp;
304 	int		 pos;
305 
306 	pos = 0;
307 	rp = NULL;
308 
309 	for (;;) {
310 		/* Skip whitespace before and after each cell. */
311 
312 		while (isspace((unsigned char)p[pos]))
313 			pos++;
314 
315 		switch (p[pos]) {
316 		case ',':  /* Next row on this input line. */
317 			pos++;
318 			rp = NULL;
319 			continue;
320 		case '\0':  /* Next row on next input line. */
321 			return(1);
322 		case '.':  /* End of layout. */
323 			pos++;
324 			tbl->part = TBL_PART_DATA;
325 			if (tbl->first_row != NULL)
326 				return(1);
327 			mandoc_msg(MANDOCERR_TBLNOLAYOUT,
328 			    tbl->parse, ln, pos, NULL);
329 			rp = mandoc_calloc(1, sizeof(*rp));
330 			cell_alloc(tbl, rp, TBL_CELL_LEFT, 0);
331 			tbl->first_row = tbl->last_row = rp;
332 			return(1);
333 		default:  /* Cell. */
334 			break;
335 		}
336 
337 		if (rp == NULL) {  /* First cell on this line. */
338 			rp = mandoc_calloc(1, sizeof(*rp));
339 			if (tbl->last_row)
340 				tbl->last_row->next = rp;
341 			else
342 				tbl->first_row = rp;
343 			tbl->last_row = rp;
344 		}
345 		if ( ! cell(tbl, rp, ln, p, &pos))
346 			return(1);
347 	}
348 }
349 
350 static struct tbl_cell *
351 cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos,
352 		int vert)
353 {
354 	struct tbl_cell	*p, *pp;
355 	struct tbl_head	*h, *hp;
356 
357 	p = mandoc_calloc(1, sizeof(struct tbl_cell));
358 
359 	if (NULL != (pp = rp->last)) {
360 		pp->next = p;
361 		h = pp->head->next;
362 	} else {
363 		rp->first = p;
364 		h = tbl->first_head;
365 	}
366 	rp->last = p;
367 
368 	p->pos = pos;
369 	p->vert = vert;
370 
371 	/* Re-use header. */
372 
373 	if (h) {
374 		p->head = h;
375 		return(p);
376 	}
377 
378 	hp = mandoc_calloc(1, sizeof(struct tbl_head));
379 	hp->ident = tbl->opts.cols++;
380 	hp->vert = vert;
381 
382 	if (tbl->last_head) {
383 		hp->prev = tbl->last_head;
384 		tbl->last_head->next = hp;
385 	} else
386 		tbl->first_head = hp;
387 	tbl->last_head = hp;
388 
389 	p->head = hp;
390 	return(p);
391 }
392