xref: /netbsd-src/external/bsd/mdocml/dist/tbl_data.c (revision 6167eca2d062f3691f8b22e3b8ea212d6dde852a)
1*6167eca2Schristos /*	Id: tbl_data.c,v 1.52 2019/02/09 16:00:39 schwarze Exp  */
2c0d9444aSjoerg /*
348741257Sjoerg  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*6167eca2Schristos  * Copyright (c) 2011,2015,2017,2018,2019 Ingo Schwarze <schwarze@openbsd.org>
5c0d9444aSjoerg  *
6c0d9444aSjoerg  * Permission to use, copy, modify, and distribute this software for any
7c0d9444aSjoerg  * purpose with or without fee is hereby granted, provided that the above
8c0d9444aSjoerg  * copyright notice and this permission notice appear in all copies.
9c0d9444aSjoerg  *
10c0d9444aSjoerg  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11c0d9444aSjoerg  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12c0d9444aSjoerg  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13c0d9444aSjoerg  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14c0d9444aSjoerg  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15c0d9444aSjoerg  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16c0d9444aSjoerg  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17c0d9444aSjoerg  */
18c0d9444aSjoerg #include "config.h"
195c413d0cSchristos 
205c413d0cSchristos #include <sys/types.h>
21c0d9444aSjoerg 
22c0d9444aSjoerg #include <assert.h>
23c0d9444aSjoerg #include <ctype.h>
24*6167eca2Schristos #include <stdio.h>
25c0d9444aSjoerg #include <stdlib.h>
26c0d9444aSjoerg #include <string.h>
27c0d9444aSjoerg #include <time.h>
28c0d9444aSjoerg 
295c413d0cSchristos #include "mandoc_aux.h"
30*6167eca2Schristos #include "mandoc.h"
31*6167eca2Schristos #include "tbl.h"
32c0d9444aSjoerg #include "libmandoc.h"
33*6167eca2Schristos #include "tbl_int.h"
34c0d9444aSjoerg 
355c413d0cSchristos static	void		 getdata(struct tbl_node *, struct tbl_span *,
36c0d9444aSjoerg 				int, const char *, int *);
3748741257Sjoerg static	struct tbl_span	*newspan(struct tbl_node *, int,
3848741257Sjoerg 				struct tbl_row *);
39c0d9444aSjoerg 
405c413d0cSchristos 
415c413d0cSchristos static void
getdata(struct tbl_node * tbl,struct tbl_span * dp,int ln,const char * p,int * pos)42f60f86a6Schristos getdata(struct tbl_node *tbl, struct tbl_span *dp,
43c0d9444aSjoerg 		int ln, const char *p, int *pos)
44c0d9444aSjoerg {
45*6167eca2Schristos 	struct tbl_dat	*dat, *pdat;
46c0d9444aSjoerg 	struct tbl_cell	*cp;
47*6167eca2Schristos 	struct tbl_span	*pdp;
485c413d0cSchristos 	int		 sv;
49c0d9444aSjoerg 
50*6167eca2Schristos 	/*
51*6167eca2Schristos 	 * Determine the length of the string in the cell
52*6167eca2Schristos 	 * and advance the parse point to the end of the cell.
53*6167eca2Schristos 	 */
54*6167eca2Schristos 
55*6167eca2Schristos 	sv = *pos;
56*6167eca2Schristos 	while (p[*pos] != '\0' && p[*pos] != tbl->opts.tab)
57*6167eca2Schristos 		(*pos)++;
58*6167eca2Schristos 
595c413d0cSchristos 	/* Advance to the next layout cell, skipping spanners. */
60c0d9444aSjoerg 
615c413d0cSchristos 	cp = dp->last == NULL ? dp->layout->first : dp->last->layout->next;
625c413d0cSchristos 	while (cp != NULL && cp->pos == TBL_CELL_SPAN)
63c0d9444aSjoerg 		cp = cp->next;
64c0d9444aSjoerg 
6548741257Sjoerg 	/*
6614e7489eSchristos 	 * If the current layout row is out of cells, allocate
6714e7489eSchristos 	 * a new cell if another row of the table has at least
6814e7489eSchristos 	 * this number of columns, or discard the input if we
6914e7489eSchristos 	 * are beyond the last column of the table as a whole.
7048741257Sjoerg 	 */
7148741257Sjoerg 
725c413d0cSchristos 	if (cp == NULL) {
7314e7489eSchristos 		if (dp->layout->last->col + 1 < dp->opts->cols) {
7414e7489eSchristos 			cp = mandoc_calloc(1, sizeof(*cp));
7514e7489eSchristos 			cp->pos = TBL_CELL_LEFT;
7614e7489eSchristos 			dp->layout->last->next = cp;
7714e7489eSchristos 			cp->col = dp->layout->last->col + 1;
7814e7489eSchristos 			dp->layout->last = cp;
7914e7489eSchristos 		} else {
80*6167eca2Schristos 			mandoc_msg(MANDOCERR_TBLDATA_EXTRA,
81*6167eca2Schristos 			    ln, sv, "%s", p + sv);
82*6167eca2Schristos 			while (p[*pos] != '\0')
8348741257Sjoerg 				(*pos)++;
845c413d0cSchristos 			return;
8548741257Sjoerg 		}
8614e7489eSchristos 	}
8748741257Sjoerg 
88*6167eca2Schristos 	dat = mandoc_malloc(sizeof(*dat));
89c0d9444aSjoerg 	dat->layout = cp;
90*6167eca2Schristos 	dat->next = NULL;
91*6167eca2Schristos 	dat->string = NULL;
92*6167eca2Schristos 	dat->hspans = 0;
93*6167eca2Schristos 	dat->vspans = 0;
94*6167eca2Schristos 	dat->block = 0;
95c0d9444aSjoerg 	dat->pos = TBL_DATA_NONE;
96*6167eca2Schristos 
97*6167eca2Schristos 	/*
98*6167eca2Schristos 	 * Increment the number of vertical spans in a data cell above,
99*6167eca2Schristos 	 * if this cell vertically extends one or more cells above.
100*6167eca2Schristos 	 * The iteration must be done over data rows,
101*6167eca2Schristos 	 * not over layout rows, because one layout row
102*6167eca2Schristos 	 * can be reused for more than one data row.
103*6167eca2Schristos 	 */
104*6167eca2Schristos 
105*6167eca2Schristos 	if (cp->pos == TBL_CELL_DOWN ||
106*6167eca2Schristos 	    (*pos - sv == 2 && p[sv] == '\\' && p[sv + 1] == '^')) {
107*6167eca2Schristos 		pdp = dp;
108*6167eca2Schristos 		while ((pdp = pdp->prev) != NULL) {
109*6167eca2Schristos 			pdat = pdp->first;
110*6167eca2Schristos 			while (pdat != NULL &&
111*6167eca2Schristos 			    pdat->layout->col < dat->layout->col)
112*6167eca2Schristos 				pdat = pdat->next;
113*6167eca2Schristos 			if (pdat == NULL)
114*6167eca2Schristos 				break;
115*6167eca2Schristos 			if (pdat->layout->pos != TBL_CELL_DOWN &&
116*6167eca2Schristos 			    strcmp(pdat->string, "\\^") != 0) {
117*6167eca2Schristos 				pdat->vspans++;
118*6167eca2Schristos 				break;
119*6167eca2Schristos 			}
120*6167eca2Schristos 		}
121*6167eca2Schristos 	}
122*6167eca2Schristos 
123*6167eca2Schristos 	/*
124*6167eca2Schristos 	 * Count the number of horizontal spans to the right of this cell.
125*6167eca2Schristos 	 * This is purely a matter of the layout, independent of the data.
126*6167eca2Schristos 	 */
127*6167eca2Schristos 
1285c413d0cSchristos 	for (cp = cp->next; cp != NULL; cp = cp->next)
1295c413d0cSchristos 		if (cp->pos == TBL_CELL_SPAN)
130*6167eca2Schristos 			dat->hspans++;
13148741257Sjoerg 		else
13248741257Sjoerg 			break;
13348741257Sjoerg 
134*6167eca2Schristos 	/* Append the new data cell to the data row. */
135*6167eca2Schristos 
1365c413d0cSchristos 	if (dp->last == NULL)
1375c413d0cSchristos 		dp->first = dat;
1385c413d0cSchristos 	else
139c0d9444aSjoerg 		dp->last->next = dat;
140c0d9444aSjoerg 	dp->last = dat;
141c0d9444aSjoerg 
142c0d9444aSjoerg 	/*
143c0d9444aSjoerg 	 * Check for a continued-data scope opening.  This consists of a
144c0d9444aSjoerg 	 * trailing `T{' at the end of the line.  Subsequent lines,
145c0d9444aSjoerg 	 * until a standalone `T}', are included in our cell.
146c0d9444aSjoerg 	 */
147c0d9444aSjoerg 
1485c413d0cSchristos 	if (*pos - sv == 2 && p[sv] == 'T' && p[sv + 1] == '{') {
149c0d9444aSjoerg 		tbl->part = TBL_PART_CDATA;
1505c413d0cSchristos 		return;
151c0d9444aSjoerg 	}
152c0d9444aSjoerg 
1535c413d0cSchristos 	dat->string = mandoc_strndup(p + sv, *pos - sv);
154c0d9444aSjoerg 
155*6167eca2Schristos 	if (p[*pos] != '\0')
156c0d9444aSjoerg 		(*pos)++;
157c0d9444aSjoerg 
158c0d9444aSjoerg 	if ( ! strcmp(dat->string, "_"))
159c0d9444aSjoerg 		dat->pos = TBL_DATA_HORIZ;
160c0d9444aSjoerg 	else if ( ! strcmp(dat->string, "="))
161c0d9444aSjoerg 		dat->pos = TBL_DATA_DHORIZ;
162c0d9444aSjoerg 	else if ( ! strcmp(dat->string, "\\_"))
163c0d9444aSjoerg 		dat->pos = TBL_DATA_NHORIZ;
164c0d9444aSjoerg 	else if ( ! strcmp(dat->string, "\\="))
165c0d9444aSjoerg 		dat->pos = TBL_DATA_NDHORIZ;
166c0d9444aSjoerg 	else
167c0d9444aSjoerg 		dat->pos = TBL_DATA_DATA;
168c0d9444aSjoerg 
1695c413d0cSchristos 	if ((dat->layout->pos == TBL_CELL_HORIZ ||
1705c413d0cSchristos 	    dat->layout->pos == TBL_CELL_DHORIZ ||
1715c413d0cSchristos 	    dat->layout->pos == TBL_CELL_DOWN) &&
1725c413d0cSchristos 	    dat->pos == TBL_DATA_DATA && *dat->string != '\0')
1735c413d0cSchristos 		mandoc_msg(MANDOCERR_TBLDATA_SPAN,
174*6167eca2Schristos 		    ln, sv, "%s", dat->string);
175c0d9444aSjoerg }
176c0d9444aSjoerg 
17714e7489eSchristos void
tbl_cdata(struct tbl_node * tbl,int ln,const char * p,int pos)1785c413d0cSchristos tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos)
179c0d9444aSjoerg {
180c0d9444aSjoerg 	struct tbl_dat	*dat;
181c0d9444aSjoerg 	size_t		 sz;
182c0d9444aSjoerg 
183c0d9444aSjoerg 	dat = tbl->last_span->last;
184c0d9444aSjoerg 
185c0d9444aSjoerg 	if (p[pos] == 'T' && p[pos + 1] == '}') {
186c0d9444aSjoerg 		pos += 2;
187c0d9444aSjoerg 		if (p[pos] == tbl->opts.tab) {
188c0d9444aSjoerg 			tbl->part = TBL_PART_DATA;
189c0d9444aSjoerg 			pos++;
190f47368cfSchristos 			while (p[pos] != '\0')
1915c413d0cSchristos 				getdata(tbl, tbl->last_span, ln, p, &pos);
19214e7489eSchristos 			return;
1935c413d0cSchristos 		} else if (p[pos] == '\0') {
194c0d9444aSjoerg 			tbl->part = TBL_PART_DATA;
19514e7489eSchristos 			return;
196c0d9444aSjoerg 		}
197c0d9444aSjoerg 
198c0d9444aSjoerg 		/* Fallthrough: T} is part of a word. */
199c0d9444aSjoerg 	}
200c0d9444aSjoerg 
20148741257Sjoerg 	dat->pos = TBL_DATA_DATA;
20214e7489eSchristos 	dat->block = 1;
20348741257Sjoerg 
2045c413d0cSchristos 	if (dat->string != NULL) {
2055c413d0cSchristos 		sz = strlen(p + pos) + strlen(dat->string) + 2;
206c0d9444aSjoerg 		dat->string = mandoc_realloc(dat->string, sz);
2075c413d0cSchristos 		(void)strlcat(dat->string, " ", sz);
2085c413d0cSchristos 		(void)strlcat(dat->string, p + pos, sz);
209c0d9444aSjoerg 	} else
2105c413d0cSchristos 		dat->string = mandoc_strdup(p + pos);
211c0d9444aSjoerg 
2125c413d0cSchristos 	if (dat->layout->pos == TBL_CELL_DOWN)
213*6167eca2Schristos 		mandoc_msg(MANDOCERR_TBLDATA_SPAN,
214*6167eca2Schristos 		    ln, pos, "%s", dat->string);
215c0d9444aSjoerg }
216c0d9444aSjoerg 
21748741257Sjoerg static struct tbl_span *
newspan(struct tbl_node * tbl,int line,struct tbl_row * rp)21848741257Sjoerg newspan(struct tbl_node *tbl, int line, struct tbl_row *rp)
21948741257Sjoerg {
22048741257Sjoerg 	struct tbl_span	*dp;
22148741257Sjoerg 
2225c413d0cSchristos 	dp = mandoc_calloc(1, sizeof(*dp));
22348741257Sjoerg 	dp->line = line;
224603fc4ebSjoerg 	dp->opts = &tbl->opts;
22548741257Sjoerg 	dp->layout = rp;
2265c413d0cSchristos 	dp->prev = tbl->last_span;
22748741257Sjoerg 
2285c413d0cSchristos 	if (dp->prev == NULL) {
2295c413d0cSchristos 		tbl->first_span = dp;
23048741257Sjoerg 		tbl->current_span = NULL;
2315c413d0cSchristos 	} else
2325c413d0cSchristos 		dp->prev->next = dp;
2335c413d0cSchristos 	tbl->last_span = dp;
23448741257Sjoerg 
235f47368cfSchristos 	return dp;
23648741257Sjoerg }
23748741257Sjoerg 
2385c413d0cSchristos void
tbl_data(struct tbl_node * tbl,int ln,const char * p,int pos)2395c413d0cSchristos tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos)
240c0d9444aSjoerg {
241c0d9444aSjoerg 	struct tbl_row	*rp;
24214e7489eSchristos 	struct tbl_cell	*cp;
24314e7489eSchristos 	struct tbl_span	*sp;
244c0d9444aSjoerg 
24514e7489eSchristos 	rp = (sp = tbl->last_span) == NULL ? tbl->first_row :
24614e7489eSchristos 	    sp->pos == TBL_SPAN_DATA && sp->layout->next != NULL ?
24714e7489eSchristos 	    sp->layout->next : sp->layout;
248c0d9444aSjoerg 
24914e7489eSchristos 	assert(rp != NULL);
250c0d9444aSjoerg 
251*6167eca2Schristos 	if (p[1] == '\0') {
252*6167eca2Schristos 		switch (p[0]) {
253*6167eca2Schristos 		case '.':
254*6167eca2Schristos 			/*
255*6167eca2Schristos 			 * Empty request lines must be handled here
256*6167eca2Schristos 			 * and cannot be discarded in roff_parseln()
257*6167eca2Schristos 			 * because in the layout section, they
258*6167eca2Schristos 			 * are significant and end the layout.
259*6167eca2Schristos 			 */
260*6167eca2Schristos 			return;
261*6167eca2Schristos 		case '_':
26214e7489eSchristos 			sp = newspan(tbl, ln, rp);
26314e7489eSchristos 			sp->pos = TBL_SPAN_HORIZ;
2645c413d0cSchristos 			return;
265*6167eca2Schristos 		case '=':
26614e7489eSchristos 			sp = newspan(tbl, ln, rp);
26714e7489eSchristos 			sp->pos = TBL_SPAN_DHORIZ;
2685c413d0cSchristos 			return;
269*6167eca2Schristos 		default:
270*6167eca2Schristos 			break;
271*6167eca2Schristos 		}
272c0d9444aSjoerg 	}
273c0d9444aSjoerg 
27414e7489eSchristos 	/*
27514e7489eSchristos 	 * If the layout row contains nothing but horizontal lines,
27614e7489eSchristos 	 * allocate an empty span for it and assign the current span
27714e7489eSchristos 	 * to the next layout row accepting data.
27814e7489eSchristos 	 */
279c0d9444aSjoerg 
28014e7489eSchristos 	while (rp->next != NULL) {
28114e7489eSchristos 		if (rp->last->col + 1 < tbl->opts.cols)
28214e7489eSchristos 			break;
28314e7489eSchristos 		for (cp = rp->first; cp != NULL; cp = cp->next)
28414e7489eSchristos 			if (cp->pos != TBL_CELL_HORIZ &&
28514e7489eSchristos 			    cp->pos != TBL_CELL_DHORIZ)
28614e7489eSchristos 				break;
28714e7489eSchristos 		if (cp != NULL)
28814e7489eSchristos 			break;
28914e7489eSchristos 		sp = newspan(tbl, ln, rp);
29014e7489eSchristos 		sp->pos = TBL_SPAN_DATA;
29114e7489eSchristos 		rp = rp->next;
29214e7489eSchristos 	}
29314e7489eSchristos 
29414e7489eSchristos 	/* Process a real data row. */
29514e7489eSchristos 
29614e7489eSchristos 	sp = newspan(tbl, ln, rp);
29714e7489eSchristos 	sp->pos = TBL_SPAN_DATA;
2985c413d0cSchristos 	while (p[pos] != '\0')
29914e7489eSchristos 		getdata(tbl, sp, ln, p, &pos);
300c0d9444aSjoerg }
301