xref: /openbsd-src/usr.bin/mandoc/tbl_data.c (revision 2fb659f9459814da342399527a29c22924e4d32e)
1*2fb659f9Sschwarze /*	$OpenBSD: tbl_data.c,v 1.46 2021/09/10 13:23:44 schwarze Exp $ */
2393cb51eSschwarze /*
32791bd1cSschwarze  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
41a285ef1Sschwarze  * Copyright (c) 2011,2015,2017-2019,2021 Ingo Schwarze <schwarze@openbsd.org>
5393cb51eSschwarze  *
6393cb51eSschwarze  * Permission to use, copy, modify, and distribute this software for any
7393cb51eSschwarze  * purpose with or without fee is hereby granted, provided that the above
8393cb51eSschwarze  * copyright notice and this permission notice appear in all copies.
9393cb51eSschwarze  *
10393cb51eSschwarze  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11393cb51eSschwarze  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12393cb51eSschwarze  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13393cb51eSschwarze  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14393cb51eSschwarze  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15393cb51eSschwarze  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16393cb51eSschwarze  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17393cb51eSschwarze  */
1830997c59Sschwarze #include <sys/types.h>
1930997c59Sschwarze 
20393cb51eSschwarze #include <assert.h>
21393cb51eSschwarze #include <ctype.h>
22ebc37634Sschwarze #include <stdint.h>
23e501e731Sschwarze #include <stdio.h>
24393cb51eSschwarze #include <stdlib.h>
25393cb51eSschwarze #include <string.h>
262791bd1cSschwarze #include <time.h>
27393cb51eSschwarze 
284f4f7972Sschwarze #include "mandoc_aux.h"
29fae2491eSschwarze #include "mandoc.h"
30fae2491eSschwarze #include "tbl.h"
312791bd1cSschwarze #include "libmandoc.h"
32fb382a01Sschwarze #include "tbl_int.h"
33393cb51eSschwarze 
3417e37242Sschwarze static	void		 getdata(struct tbl_node *, struct tbl_span *,
352791bd1cSschwarze 				int, const char *, int *);
362733b5d3Sschwarze static	struct tbl_span	*newspan(struct tbl_node *, int,
372733b5d3Sschwarze 				struct tbl_row *);
38393cb51eSschwarze 
3949aff9f8Sschwarze 
4017e37242Sschwarze static void
getdata(struct tbl_node * tbl,struct tbl_span * dp,int ln,const char * p,int * pos)416ff6a9aaSschwarze getdata(struct tbl_node *tbl, struct tbl_span *dp,
422791bd1cSschwarze 		int ln, const char *p, int *pos)
43393cb51eSschwarze {
44a93944b2Sschwarze 	struct tbl_dat	*dat, *pdat;
452791bd1cSschwarze 	struct tbl_cell	*cp;
46a93944b2Sschwarze 	struct tbl_span	*pdp;
471a285ef1Sschwarze 	const char	*ccp;
48fbc27a2fSschwarze 	int		 startpos, endpos;
49393cb51eSschwarze 
507a5a8f14Sschwarze 	/*
517a5a8f14Sschwarze 	 * Determine the length of the string in the cell
527a5a8f14Sschwarze 	 * and advance the parse point to the end of the cell.
537a5a8f14Sschwarze 	 */
547a5a8f14Sschwarze 
55fbc27a2fSschwarze 	startpos = *pos;
56fbc27a2fSschwarze 	ccp = p + startpos;
571a285ef1Sschwarze 	while (*ccp != '\0' && *ccp != tbl->opts.tab)
581a285ef1Sschwarze 		if (*ccp++ == '\\')
591a285ef1Sschwarze 			mandoc_escape(&ccp, NULL, NULL);
601a285ef1Sschwarze 	*pos = ccp - p;
617a5a8f14Sschwarze 
625f6d1ba3Sschwarze 	/* Advance to the next layout cell, skipping spanners. */
635f6d1ba3Sschwarze 
6421da0636Sschwarze 	cp = dp->last == NULL ? dp->layout->first : dp->last->layout->next;
6521da0636Sschwarze 	while (cp != NULL && cp->pos == TBL_CELL_SPAN)
662791bd1cSschwarze 		cp = cp->next;
672791bd1cSschwarze 
688351ebcfSschwarze 	/*
69eaa4dcb4Sschwarze 	 * If the current layout row is out of cells, allocate
70eaa4dcb4Sschwarze 	 * a new cell if another row of the table has at least
71eaa4dcb4Sschwarze 	 * this number of columns, or discard the input if we
72eaa4dcb4Sschwarze 	 * are beyond the last column of the table as a whole.
738351ebcfSschwarze 	 */
748351ebcfSschwarze 
7521da0636Sschwarze 	if (cp == NULL) {
76eaa4dcb4Sschwarze 		if (dp->layout->last->col + 1 < dp->opts->cols) {
77eaa4dcb4Sschwarze 			cp = mandoc_calloc(1, sizeof(*cp));
78eaa4dcb4Sschwarze 			cp->pos = TBL_CELL_LEFT;
797d063611Sschwarze 			cp->font = ESCAPE_FONTROMAN;
80ebc37634Sschwarze 			cp->spacing = SIZE_MAX;
81eaa4dcb4Sschwarze 			dp->layout->last->next = cp;
82eaa4dcb4Sschwarze 			cp->col = dp->layout->last->col + 1;
83eaa4dcb4Sschwarze 			dp->layout->last = cp;
84eaa4dcb4Sschwarze 		} else {
85a5a5f808Sschwarze 			mandoc_msg(MANDOCERR_TBLDATA_EXTRA,
86fbc27a2fSschwarze 			    ln, startpos, "%s", p + startpos);
877a5a8f14Sschwarze 			while (p[*pos] != '\0')
888351ebcfSschwarze 				(*pos)++;
8917e37242Sschwarze 			return;
908351ebcfSschwarze 		}
91eaa4dcb4Sschwarze 	}
928351ebcfSschwarze 
93a93944b2Sschwarze 	dat = mandoc_malloc(sizeof(*dat));
942791bd1cSschwarze 	dat->layout = cp;
95a93944b2Sschwarze 	dat->next = NULL;
96a93944b2Sschwarze 	dat->string = NULL;
97a93944b2Sschwarze 	dat->hspans = 0;
98a93944b2Sschwarze 	dat->vspans = 0;
99a93944b2Sschwarze 	dat->block = 0;
1002791bd1cSschwarze 	dat->pos = TBL_DATA_NONE;
101a93944b2Sschwarze 
102a93944b2Sschwarze 	/*
103a93944b2Sschwarze 	 * Increment the number of vertical spans in a data cell above,
104a93944b2Sschwarze 	 * if this cell vertically extends one or more cells above.
105a93944b2Sschwarze 	 * The iteration must be done over data rows,
106a93944b2Sschwarze 	 * not over layout rows, because one layout row
107a93944b2Sschwarze 	 * can be reused for more than one data row.
108a93944b2Sschwarze 	 */
109a93944b2Sschwarze 
1107a5a8f14Sschwarze 	if (cp->pos == TBL_CELL_DOWN ||
111fbc27a2fSschwarze 	    (*pos - startpos == 2 &&
112fbc27a2fSschwarze 	     p[startpos] == '\\' && p[startpos + 1] == '^')) {
113a93944b2Sschwarze 		pdp = dp;
114a93944b2Sschwarze 		while ((pdp = pdp->prev) != NULL) {
115a93944b2Sschwarze 			pdat = pdp->first;
116a93944b2Sschwarze 			while (pdat != NULL &&
117a93944b2Sschwarze 			    pdat->layout->col < dat->layout->col)
118a93944b2Sschwarze 				pdat = pdat->next;
119a93944b2Sschwarze 			if (pdat == NULL)
120a93944b2Sschwarze 				break;
1217a5a8f14Sschwarze 			if (pdat->layout->pos != TBL_CELL_DOWN &&
1227a5a8f14Sschwarze 			    strcmp(pdat->string, "\\^") != 0) {
123a93944b2Sschwarze 				pdat->vspans++;
124a93944b2Sschwarze 				break;
125a93944b2Sschwarze 			}
126a93944b2Sschwarze 		}
127a93944b2Sschwarze 	}
128a93944b2Sschwarze 
129a93944b2Sschwarze 	/*
130a93944b2Sschwarze 	 * Count the number of horizontal spans to the right of this cell.
131a93944b2Sschwarze 	 * This is purely a matter of the layout, independent of the data.
132a93944b2Sschwarze 	 */
133a93944b2Sschwarze 
13421da0636Sschwarze 	for (cp = cp->next; cp != NULL; cp = cp->next)
13521da0636Sschwarze 		if (cp->pos == TBL_CELL_SPAN)
136a93944b2Sschwarze 			dat->hspans++;
1378351ebcfSschwarze 		else
1388351ebcfSschwarze 			break;
1398351ebcfSschwarze 
140a93944b2Sschwarze 	/* Append the new data cell to the data row. */
141a93944b2Sschwarze 
14221da0636Sschwarze 	if (dp->last == NULL)
14321da0636Sschwarze 		dp->first = dat;
14421da0636Sschwarze 	else
1452791bd1cSschwarze 		dp->last->next = dat;
1462791bd1cSschwarze 	dp->last = dat;
1472791bd1cSschwarze 
148*2fb659f9Sschwarze 	/* Strip leading and trailing spaces, if requested. */
149393cb51eSschwarze 
150fbc27a2fSschwarze 	endpos = *pos;
151fbc27a2fSschwarze 	if (dp->opts->opts & TBL_OPT_NOSPACE) {
152fbc27a2fSschwarze 		while (p[startpos] == ' ')
153fbc27a2fSschwarze 			startpos++;
154fbc27a2fSschwarze 		while (endpos > startpos && p[endpos - 1] == ' ')
155fbc27a2fSschwarze 			endpos--;
156fbc27a2fSschwarze 	}
157*2fb659f9Sschwarze 
158*2fb659f9Sschwarze 	/*
159*2fb659f9Sschwarze 	 * Check for a continued-data scope opening.  This consists of a
160*2fb659f9Sschwarze 	 * trailing `T{' at the end of the line.  Subsequent lines,
161*2fb659f9Sschwarze 	 * until a standalone `T}', are included in our cell.
162*2fb659f9Sschwarze 	 */
163*2fb659f9Sschwarze 
164*2fb659f9Sschwarze 	if (endpos - startpos == 2 &&
165*2fb659f9Sschwarze 	    p[startpos] == 'T' && p[startpos + 1] == '{') {
166*2fb659f9Sschwarze 		tbl->part = TBL_PART_CDATA;
167*2fb659f9Sschwarze 		return;
168*2fb659f9Sschwarze 	}
169*2fb659f9Sschwarze 
170fbc27a2fSschwarze 	dat->string = mandoc_strndup(p + startpos, endpos - startpos);
171393cb51eSschwarze 
1727a5a8f14Sschwarze 	if (p[*pos] != '\0')
1732791bd1cSschwarze 		(*pos)++;
174393cb51eSschwarze 
175393cb51eSschwarze 	if ( ! strcmp(dat->string, "_"))
1762791bd1cSschwarze 		dat->pos = TBL_DATA_HORIZ;
177393cb51eSschwarze 	else if ( ! strcmp(dat->string, "="))
1782791bd1cSschwarze 		dat->pos = TBL_DATA_DHORIZ;
179393cb51eSschwarze 	else if ( ! strcmp(dat->string, "\\_"))
1802791bd1cSschwarze 		dat->pos = TBL_DATA_NHORIZ;
181393cb51eSschwarze 	else if ( ! strcmp(dat->string, "\\="))
1822791bd1cSschwarze 		dat->pos = TBL_DATA_NDHORIZ;
183393cb51eSschwarze 	else
1842791bd1cSschwarze 		dat->pos = TBL_DATA_DATA;
1852791bd1cSschwarze 
18621da0636Sschwarze 	if ((dat->layout->pos == TBL_CELL_HORIZ ||
18721da0636Sschwarze 	    dat->layout->pos == TBL_CELL_DHORIZ ||
18821da0636Sschwarze 	    dat->layout->pos == TBL_CELL_DOWN) &&
18921da0636Sschwarze 	    dat->pos == TBL_DATA_DATA && *dat->string != '\0')
19092ff8da6Sschwarze 		mandoc_msg(MANDOCERR_TBLDATA_SPAN,
191fbc27a2fSschwarze 		    ln, startpos, "%s", dat->string);
192393cb51eSschwarze }
193393cb51eSschwarze 
194d93f8561Sschwarze void
tbl_cdata(struct tbl_node * tbl,int ln,const char * p,int pos)19539aede77Sschwarze tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos)
1962791bd1cSschwarze {
1972791bd1cSschwarze 	struct tbl_dat	*dat;
1982791bd1cSschwarze 	size_t		 sz;
199ec04407bSschwarze 
200ec04407bSschwarze 	dat = tbl->last_span->last;
201ec04407bSschwarze 
202ec04407bSschwarze 	if (p[pos] == 'T' && p[pos + 1] == '}') {
203ec04407bSschwarze 		pos += 2;
20486dcadabSschwarze 		if (tbl->opts.opts & TBL_OPT_NOSPACE)
20586dcadabSschwarze 			while (p[pos] == ' ')
20686dcadabSschwarze 				pos++;
207ec04407bSschwarze 		if (p[pos] == tbl->opts.tab) {
208ec04407bSschwarze 			tbl->part = TBL_PART_DATA;
209ec04407bSschwarze 			pos++;
2103569c652Sschwarze 			while (p[pos] != '\0')
21117e37242Sschwarze 				getdata(tbl, tbl->last_span, ln, p, &pos);
212d93f8561Sschwarze 			return;
21321da0636Sschwarze 		} else if (p[pos] == '\0') {
2142791bd1cSschwarze 			tbl->part = TBL_PART_DATA;
215d93f8561Sschwarze 			return;
2162791bd1cSschwarze 		}
2172791bd1cSschwarze 
218ec04407bSschwarze 		/* Fallthrough: T} is part of a word. */
219ec04407bSschwarze 	}
2202791bd1cSschwarze 
2218351ebcfSschwarze 	dat->pos = TBL_DATA_DATA;
2222c3e66c4Sschwarze 	dat->block = 1;
2238351ebcfSschwarze 
22421da0636Sschwarze 	if (dat->string != NULL) {
22592ff8da6Sschwarze 		sz = strlen(p + pos) + strlen(dat->string) + 2;
2262791bd1cSschwarze 		dat->string = mandoc_realloc(dat->string, sz);
2270b2f1307Sschwarze 		(void)strlcat(dat->string, " ", sz);
22892ff8da6Sschwarze 		(void)strlcat(dat->string, p + pos, sz);
2292791bd1cSschwarze 	} else
23092ff8da6Sschwarze 		dat->string = mandoc_strdup(p + pos);
2312791bd1cSschwarze 
23221da0636Sschwarze 	if (dat->layout->pos == TBL_CELL_DOWN)
233a5a5f808Sschwarze 		mandoc_msg(MANDOCERR_TBLDATA_SPAN,
234a5a5f808Sschwarze 		    ln, pos, "%s", dat->string);
2352791bd1cSschwarze }
236393cb51eSschwarze 
237a7f5ddf9Sschwarze static struct tbl_span *
newspan(struct tbl_node * tbl,int line,struct tbl_row * rp)2382733b5d3Sschwarze newspan(struct tbl_node *tbl, int line, struct tbl_row *rp)
239a7f5ddf9Sschwarze {
240a7f5ddf9Sschwarze 	struct tbl_span	*dp;
241a7f5ddf9Sschwarze 
24221da0636Sschwarze 	dp = mandoc_calloc(1, sizeof(*dp));
2432733b5d3Sschwarze 	dp->line = line;
244c7497e73Sschwarze 	dp->opts = &tbl->opts;
245a7f5ddf9Sschwarze 	dp->layout = rp;
246fd9b947eSschwarze 	dp->prev = tbl->last_span;
247a7f5ddf9Sschwarze 
248fd9b947eSschwarze 	if (dp->prev == NULL) {
249fd9b947eSschwarze 		tbl->first_span = dp;
2507374e4feSschwarze 		tbl->current_span = NULL;
251fd9b947eSschwarze 	} else
252fd9b947eSschwarze 		dp->prev->next = dp;
253fd9b947eSschwarze 	tbl->last_span = dp;
254a7f5ddf9Sschwarze 
255526e306bSschwarze 	return dp;
256a7f5ddf9Sschwarze }
257a7f5ddf9Sschwarze 
25817e37242Sschwarze void
tbl_data(struct tbl_node * tbl,int ln,const char * p,int pos)25939aede77Sschwarze tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos)
260393cb51eSschwarze {
2612791bd1cSschwarze 	struct tbl_row	*rp;
262eaa4dcb4Sschwarze 	struct tbl_cell	*cp;
263acb996efSschwarze 	struct tbl_span	*sp;
264393cb51eSschwarze 
26561deec73Sschwarze 	for (sp = tbl->last_span; sp != NULL; sp = sp->prev)
26661deec73Sschwarze 		if (sp->pos == TBL_SPAN_DATA)
26761deec73Sschwarze 			break;
26861deec73Sschwarze 	rp = sp == NULL ? tbl->first_row :
26961deec73Sschwarze 	    sp->layout->next == NULL ? sp->layout : sp->layout->next;
270eaa4dcb4Sschwarze 	assert(rp != NULL);
2718351ebcfSschwarze 
272ef2f4505Sschwarze 	if (p[1] == '\0') {
273ef2f4505Sschwarze 		switch (p[0]) {
274ef2f4505Sschwarze 		case '.':
275ef2f4505Sschwarze 			/*
276ef2f4505Sschwarze 			 * Empty request lines must be handled here
277ef2f4505Sschwarze 			 * and cannot be discarded in roff_parseln()
278ef2f4505Sschwarze 			 * because in the layout section, they
279ef2f4505Sschwarze 			 * are significant and end the layout.
280ef2f4505Sschwarze 			 */
281ef2f4505Sschwarze 			return;
282ef2f4505Sschwarze 		case '_':
283acb996efSschwarze 			sp = newspan(tbl, ln, rp);
284eaa4dcb4Sschwarze 			sp->pos = TBL_SPAN_HORIZ;
28517e37242Sschwarze 			return;
286ef2f4505Sschwarze 		case '=':
287acb996efSschwarze 			sp = newspan(tbl, ln, rp);
288eaa4dcb4Sschwarze 			sp->pos = TBL_SPAN_DHORIZ;
28917e37242Sschwarze 			return;
290ef2f4505Sschwarze 		default:
291ef2f4505Sschwarze 			break;
292ef2f4505Sschwarze 		}
293393cb51eSschwarze 	}
2942791bd1cSschwarze 
295acb996efSschwarze 	/*
296acb996efSschwarze 	 * If the layout row contains nothing but horizontal lines,
297acb996efSschwarze 	 * allocate an empty span for it and assign the current span
298acb996efSschwarze 	 * to the next layout row accepting data.
299acb996efSschwarze 	 */
300acb996efSschwarze 
301acb996efSschwarze 	while (rp->next != NULL) {
302acb996efSschwarze 		if (rp->last->col + 1 < tbl->opts.cols)
303acb996efSschwarze 			break;
304acb996efSschwarze 		for (cp = rp->first; cp != NULL; cp = cp->next)
305acb996efSschwarze 			if (cp->pos != TBL_CELL_HORIZ &&
306acb996efSschwarze 			    cp->pos != TBL_CELL_DHORIZ)
307acb996efSschwarze 				break;
308acb996efSschwarze 		if (cp != NULL)
309acb996efSschwarze 			break;
310acb996efSschwarze 		sp = newspan(tbl, ln, rp);
311acb996efSschwarze 		sp->pos = TBL_SPAN_DATA;
312acb996efSschwarze 		rp = rp->next;
313acb996efSschwarze 	}
314acb996efSschwarze 
315acb996efSschwarze 	/* Process a real data row. */
316acb996efSschwarze 
317acb996efSschwarze 	sp = newspan(tbl, ln, rp);
318acb996efSschwarze 	sp->pos = TBL_SPAN_DATA;
31921da0636Sschwarze 	while (p[pos] != '\0')
320eaa4dcb4Sschwarze 		getdata(tbl, sp, ln, p, &pos);
321393cb51eSschwarze }
322