xref: /netbsd-src/external/bsd/mdocml/dist/tbl_layout.c (revision f32144615845cc98528c78d4f24930c3b8fe6a8f)
1 /*	$Vendor-Id: tbl_layout.c,v 1.18 2011/04/04 23:04:38 kristaps Exp $ */
2 /*
3  * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <time.h>
26 
27 #include "mandoc.h"
28 #include "libmandoc.h"
29 #include "libroff.h"
30 
31 struct	tbl_phrase {
32 	char		 name;
33 	enum tbl_cellt	 key;
34 };
35 
36 /*
37  * FIXME: we can make this parse a lot nicer by, when an error is
38  * encountered in a layout key, bailing to the next key (i.e. to the
39  * next whitespace then continuing).
40  */
41 
42 #define	KEYS_MAX	 11
43 
44 static	const struct tbl_phrase keys[KEYS_MAX] = {
45 	{ 'c',		 TBL_CELL_CENTRE },
46 	{ 'r',		 TBL_CELL_RIGHT },
47 	{ 'l',		 TBL_CELL_LEFT },
48 	{ 'n',		 TBL_CELL_NUMBER },
49 	{ 's',		 TBL_CELL_SPAN },
50 	{ 'a',		 TBL_CELL_LONG },
51 	{ '^',		 TBL_CELL_DOWN },
52 	{ '-',		 TBL_CELL_HORIZ },
53 	{ '_',		 TBL_CELL_HORIZ },
54 	{ '=',		 TBL_CELL_DHORIZ },
55 	{ '|',		 TBL_CELL_VERT }
56 };
57 
58 static	int		 mods(struct tbl_node *, struct tbl_cell *,
59 				int, const char *, int *);
60 static	int		 cell(struct tbl_node *, struct tbl_row *,
61 				int, const char *, int *);
62 static	void		 row(struct tbl_node *, int, const char *, int *);
63 static	struct tbl_cell *cell_alloc(struct tbl_node *,
64 				struct tbl_row *, enum tbl_cellt);
65 static	void		 head_adjust(const struct tbl_cell *,
66 				struct tbl_head *);
67 
68 static int
69 mods(struct tbl_node *tbl, struct tbl_cell *cp,
70 		int ln, const char *p, int *pos)
71 {
72 	char		 buf[5];
73 	int		 i;
74 
75 mod:
76 	/*
77 	 * XXX: since, at least for now, modifiers are non-conflicting
78 	 * (are separable by value, regardless of position), we let
79 	 * modifiers come in any order.  The existing tbl doesn't let
80 	 * this happen.
81 	 */
82 	switch (p[*pos]) {
83 	case ('\0'):
84 		/* FALLTHROUGH */
85 	case (' '):
86 		/* FALLTHROUGH */
87 	case ('\t'):
88 		/* FALLTHROUGH */
89 	case (','):
90 		/* FALLTHROUGH */
91 	case ('.'):
92 		return(1);
93 	default:
94 		break;
95 	}
96 
97 	/* Throw away parenthesised expression. */
98 
99 	if ('(' == p[*pos]) {
100 		(*pos)++;
101 		while (p[*pos] && ')' != p[*pos])
102 			(*pos)++;
103 		if (')' == p[*pos]) {
104 			(*pos)++;
105 			goto mod;
106 		}
107 		mandoc_msg(MANDOCERR_TBLLAYOUT,
108 				tbl->parse, ln, *pos, NULL);
109 		return(0);
110 	}
111 
112 	/* Parse numerical spacing from modifier string. */
113 
114 	if (isdigit((unsigned char)p[*pos])) {
115 		for (i = 0; i < 4; i++) {
116 			if ( ! isdigit((unsigned char)p[*pos + i]))
117 				break;
118 			buf[i] = p[*pos + i];
119 		}
120 		buf[i] = '\0';
121 
122 		/* No greater than 4 digits. */
123 
124 		if (4 == i) {
125 			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
126 					ln, *pos, NULL);
127 			return(0);
128 		}
129 
130 		*pos += i;
131 		cp->spacing = (size_t)atoi(buf);
132 
133 		goto mod;
134 		/* NOTREACHED */
135 	}
136 
137 	/* TODO: GNU has many more extensions. */
138 
139 	switch (tolower((unsigned char)p[(*pos)++])) {
140 	case ('z'):
141 		cp->flags |= TBL_CELL_WIGN;
142 		goto mod;
143 	case ('u'):
144 		cp->flags |= TBL_CELL_UP;
145 		goto mod;
146 	case ('e'):
147 		cp->flags |= TBL_CELL_EQUAL;
148 		goto mod;
149 	case ('t'):
150 		cp->flags |= TBL_CELL_TALIGN;
151 		goto mod;
152 	case ('d'):
153 		cp->flags |= TBL_CELL_BALIGN;
154 		goto mod;
155 	case ('w'):  /* XXX for now, ignore minimal column width */
156 		goto mod;
157 	case ('f'):
158 		break;
159 	case ('b'):
160 		/* FALLTHROUGH */
161 	case ('i'):
162 		(*pos)--;
163 		break;
164 	default:
165 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
166 				ln, *pos - 1, NULL);
167 		return(0);
168 	}
169 
170 	switch (tolower((unsigned char)p[(*pos)++])) {
171 	case ('b'):
172 		cp->flags |= TBL_CELL_BOLD;
173 		goto mod;
174 	case ('i'):
175 		cp->flags |= TBL_CELL_ITALIC;
176 		goto mod;
177 	default:
178 		break;
179 	}
180 
181 	mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
182 			ln, *pos - 1, NULL);
183 	return(0);
184 }
185 
186 static int
187 cell(struct tbl_node *tbl, struct tbl_row *rp,
188 		int ln, const char *p, int *pos)
189 {
190 	int		 i;
191 	enum tbl_cellt	 c;
192 
193 	/* Parse the column position (`r', `R', `|', ...). */
194 
195 	for (i = 0; i < KEYS_MAX; i++)
196 		if (tolower((unsigned char)p[*pos]) == keys[i].name)
197 			break;
198 
199 	if (KEYS_MAX == i) {
200 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
201 				ln, *pos, NULL);
202 		return(0);
203 	}
204 
205 	c = keys[i].key;
206 
207 	/*
208 	 * If a span cell is found first, raise a warning and abort the
209 	 * parse.  If a span cell is found and the last layout element
210 	 * isn't a "normal" layout, bail.
211 	 *
212 	 * FIXME: recover from this somehow?
213 	 */
214 
215 	if (TBL_CELL_SPAN == c) {
216 		if (NULL == rp->first) {
217 			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
218 					ln, *pos, NULL);
219 			return(0);
220 		} else if (rp->last)
221 			switch (rp->last->pos) {
222 			case (TBL_CELL_VERT):
223 			case (TBL_CELL_DVERT):
224 			case (TBL_CELL_HORIZ):
225 			case (TBL_CELL_DHORIZ):
226 				mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
227 						ln, *pos, NULL);
228 				return(0);
229 			default:
230 				break;
231 			}
232 	}
233 
234 	/*
235 	 * If a vertical spanner is found, we may not be in the first
236 	 * row.
237 	 */
238 
239 	if (TBL_CELL_DOWN == c && rp == tbl->first_row) {
240 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL);
241 		return(0);
242 	}
243 
244 	(*pos)++;
245 
246 	/* Extra check for the double-vertical. */
247 
248 	if (TBL_CELL_VERT == c && '|' == p[*pos]) {
249 		(*pos)++;
250 		c = TBL_CELL_DVERT;
251 	}
252 
253 	/* Disallow adjacent spacers. */
254 
255 	if (rp->last && (TBL_CELL_VERT == c || TBL_CELL_DVERT == c) &&
256 			(TBL_CELL_VERT == rp->last->pos ||
257 			 TBL_CELL_DVERT == rp->last->pos)) {
258 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL);
259 		return(0);
260 	}
261 
262 	/* Allocate cell then parse its modifiers. */
263 
264 	return(mods(tbl, cell_alloc(tbl, rp, c), ln, p, pos));
265 }
266 
267 
268 static void
269 row(struct tbl_node *tbl, int ln, const char *p, int *pos)
270 {
271 	struct tbl_row	*rp;
272 
273 row:	/*
274 	 * EBNF describing this section:
275 	 *
276 	 * row		::= row_list [:space:]* [.]?[\n]
277 	 * row_list	::= [:space:]* row_elem row_tail
278 	 * row_tail	::= [:space:]*[,] row_list |
279 	 *                  epsilon
280 	 * row_elem	::= [\t\ ]*[:alpha:]+
281 	 */
282 
283 	rp = mandoc_calloc(1, sizeof(struct tbl_row));
284 	if (tbl->last_row) {
285 		tbl->last_row->next = rp;
286 		tbl->last_row = rp;
287 	} else
288 		tbl->last_row = tbl->first_row = rp;
289 
290 cell:
291 	while (isspace((unsigned char)p[*pos]))
292 		(*pos)++;
293 
294 	/* Safely exit layout context. */
295 
296 	if ('.' == p[*pos]) {
297 		tbl->part = TBL_PART_DATA;
298 		if (NULL == tbl->first_row)
299 			mandoc_msg(MANDOCERR_TBLNOLAYOUT, tbl->parse,
300 					ln, *pos, NULL);
301 		(*pos)++;
302 		return;
303 	}
304 
305 	/* End (and possibly restart) a row. */
306 
307 	if (',' == p[*pos]) {
308 		(*pos)++;
309 		goto row;
310 	} else if ('\0' == p[*pos])
311 		return;
312 
313 	if ( ! cell(tbl, rp, ln, p, pos))
314 		return;
315 
316 	goto cell;
317 	/* NOTREACHED */
318 }
319 
320 int
321 tbl_layout(struct tbl_node *tbl, int ln, const char *p)
322 {
323 	int		 pos;
324 
325 	pos = 0;
326 	row(tbl, ln, p, &pos);
327 
328 	/* Always succeed. */
329 	return(1);
330 }
331 
332 static struct tbl_cell *
333 cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos)
334 {
335 	struct tbl_cell	*p, *pp;
336 	struct tbl_head	*h, *hp;
337 
338 	p = mandoc_calloc(1, sizeof(struct tbl_cell));
339 
340 	if (NULL != (pp = rp->last)) {
341 		rp->last->next = p;
342 		rp->last = p;
343 	} else
344 		rp->last = rp->first = p;
345 
346 	p->pos = pos;
347 
348 	/*
349 	 * This is a little bit complicated.  Here we determine the
350 	 * header the corresponds to a cell.  We add headers dynamically
351 	 * when need be or re-use them, otherwise.  As an example, given
352 	 * the following:
353 	 *
354 	 * 	1  c || l
355 	 * 	2  | c | l
356 	 * 	3  l l
357 	 * 	3  || c | l |.
358 	 *
359 	 * We first add the new headers (as there are none) in (1); then
360 	 * in (2) we insert the first spanner (as it doesn't match up
361 	 * with the header); then we re-use the prior data headers,
362 	 * skipping over the spanners; then we re-use everything and add
363 	 * a last spanner.  Note that VERT headers are made into DVERT
364 	 * ones.
365 	 */
366 
367 	h = pp ? pp->head->next : tbl->first_head;
368 
369 	if (h) {
370 		/* Re-use data header. */
371 		if (TBL_HEAD_DATA == h->pos &&
372 				(TBL_CELL_VERT != p->pos &&
373 				 TBL_CELL_DVERT != p->pos)) {
374 			p->head = h;
375 			return(p);
376 		}
377 
378 		/* Re-use spanner header. */
379 		if (TBL_HEAD_DATA != h->pos &&
380 				(TBL_CELL_VERT == p->pos ||
381 				 TBL_CELL_DVERT == p->pos)) {
382 			head_adjust(p, h);
383 			p->head = h;
384 			return(p);
385 		}
386 
387 		/* Right-shift headers with a new spanner. */
388 		if (TBL_HEAD_DATA == h->pos &&
389 				(TBL_CELL_VERT == p->pos ||
390 				 TBL_CELL_DVERT == p->pos)) {
391 			hp = mandoc_calloc(1, sizeof(struct tbl_head));
392 			hp->ident = tbl->opts.cols++;
393 			hp->prev = h->prev;
394 			if (h->prev)
395 				h->prev->next = hp;
396 			if (h == tbl->first_head)
397 				tbl->first_head = hp;
398 			h->prev = hp;
399 			hp->next = h;
400 			head_adjust(p, hp);
401 			p->head = hp;
402 			return(p);
403 		}
404 
405 		if (NULL != (h = h->next)) {
406 			head_adjust(p, h);
407 			p->head = h;
408 			return(p);
409 		}
410 
411 		/* Fall through to default case... */
412 	}
413 
414 	hp = mandoc_calloc(1, sizeof(struct tbl_head));
415 	hp->ident = tbl->opts.cols++;
416 
417 	if (tbl->last_head) {
418 		hp->prev = tbl->last_head;
419 		tbl->last_head->next = hp;
420 		tbl->last_head = hp;
421 	} else
422 		tbl->last_head = tbl->first_head = hp;
423 
424 	head_adjust(p, hp);
425 	p->head = hp;
426 	return(p);
427 }
428 
429 static void
430 head_adjust(const struct tbl_cell *cellp, struct tbl_head *head)
431 {
432 	if (TBL_CELL_VERT != cellp->pos &&
433 			TBL_CELL_DVERT != cellp->pos) {
434 		head->pos = TBL_HEAD_DATA;
435 		return;
436 	}
437 
438 	if (TBL_CELL_VERT == cellp->pos)
439 		if (TBL_HEAD_DVERT != head->pos)
440 			head->pos = TBL_HEAD_VERT;
441 
442 	if (TBL_CELL_DVERT == cellp->pos)
443 		head->pos = TBL_HEAD_DVERT;
444 }
445 
446