1*2fb659f9Sschwarze /* $OpenBSD: tbl_data.c,v 1.46 2021/09/10 13:23:44 schwarze Exp $ */
2393cb51eSschwarze /*
32791bd1cSschwarze * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
41a285ef1Sschwarze * Copyright (c) 2011,2015,2017-2019,2021 Ingo Schwarze <schwarze@openbsd.org>
5393cb51eSschwarze *
6393cb51eSschwarze * Permission to use, copy, modify, and distribute this software for any
7393cb51eSschwarze * purpose with or without fee is hereby granted, provided that the above
8393cb51eSschwarze * copyright notice and this permission notice appear in all copies.
9393cb51eSschwarze *
10393cb51eSschwarze * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11393cb51eSschwarze * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12393cb51eSschwarze * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13393cb51eSschwarze * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14393cb51eSschwarze * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15393cb51eSschwarze * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16393cb51eSschwarze * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17393cb51eSschwarze */
1830997c59Sschwarze #include <sys/types.h>
1930997c59Sschwarze
20393cb51eSschwarze #include <assert.h>
21393cb51eSschwarze #include <ctype.h>
22ebc37634Sschwarze #include <stdint.h>
23e501e731Sschwarze #include <stdio.h>
24393cb51eSschwarze #include <stdlib.h>
25393cb51eSschwarze #include <string.h>
262791bd1cSschwarze #include <time.h>
27393cb51eSschwarze
284f4f7972Sschwarze #include "mandoc_aux.h"
29fae2491eSschwarze #include "mandoc.h"
30fae2491eSschwarze #include "tbl.h"
312791bd1cSschwarze #include "libmandoc.h"
32fb382a01Sschwarze #include "tbl_int.h"
33393cb51eSschwarze
3417e37242Sschwarze static void getdata(struct tbl_node *, struct tbl_span *,
352791bd1cSschwarze int, const char *, int *);
362733b5d3Sschwarze static struct tbl_span *newspan(struct tbl_node *, int,
372733b5d3Sschwarze struct tbl_row *);
38393cb51eSschwarze
3949aff9f8Sschwarze
4017e37242Sschwarze static void
getdata(struct tbl_node * tbl,struct tbl_span * dp,int ln,const char * p,int * pos)416ff6a9aaSschwarze getdata(struct tbl_node *tbl, struct tbl_span *dp,
422791bd1cSschwarze int ln, const char *p, int *pos)
43393cb51eSschwarze {
44a93944b2Sschwarze struct tbl_dat *dat, *pdat;
452791bd1cSschwarze struct tbl_cell *cp;
46a93944b2Sschwarze struct tbl_span *pdp;
471a285ef1Sschwarze const char *ccp;
48fbc27a2fSschwarze int startpos, endpos;
49393cb51eSschwarze
507a5a8f14Sschwarze /*
517a5a8f14Sschwarze * Determine the length of the string in the cell
527a5a8f14Sschwarze * and advance the parse point to the end of the cell.
537a5a8f14Sschwarze */
547a5a8f14Sschwarze
55fbc27a2fSschwarze startpos = *pos;
56fbc27a2fSschwarze ccp = p + startpos;
571a285ef1Sschwarze while (*ccp != '\0' && *ccp != tbl->opts.tab)
581a285ef1Sschwarze if (*ccp++ == '\\')
591a285ef1Sschwarze mandoc_escape(&ccp, NULL, NULL);
601a285ef1Sschwarze *pos = ccp - p;
617a5a8f14Sschwarze
625f6d1ba3Sschwarze /* Advance to the next layout cell, skipping spanners. */
635f6d1ba3Sschwarze
6421da0636Sschwarze cp = dp->last == NULL ? dp->layout->first : dp->last->layout->next;
6521da0636Sschwarze while (cp != NULL && cp->pos == TBL_CELL_SPAN)
662791bd1cSschwarze cp = cp->next;
672791bd1cSschwarze
688351ebcfSschwarze /*
69eaa4dcb4Sschwarze * If the current layout row is out of cells, allocate
70eaa4dcb4Sschwarze * a new cell if another row of the table has at least
71eaa4dcb4Sschwarze * this number of columns, or discard the input if we
72eaa4dcb4Sschwarze * are beyond the last column of the table as a whole.
738351ebcfSschwarze */
748351ebcfSschwarze
7521da0636Sschwarze if (cp == NULL) {
76eaa4dcb4Sschwarze if (dp->layout->last->col + 1 < dp->opts->cols) {
77eaa4dcb4Sschwarze cp = mandoc_calloc(1, sizeof(*cp));
78eaa4dcb4Sschwarze cp->pos = TBL_CELL_LEFT;
797d063611Sschwarze cp->font = ESCAPE_FONTROMAN;
80ebc37634Sschwarze cp->spacing = SIZE_MAX;
81eaa4dcb4Sschwarze dp->layout->last->next = cp;
82eaa4dcb4Sschwarze cp->col = dp->layout->last->col + 1;
83eaa4dcb4Sschwarze dp->layout->last = cp;
84eaa4dcb4Sschwarze } else {
85a5a5f808Sschwarze mandoc_msg(MANDOCERR_TBLDATA_EXTRA,
86fbc27a2fSschwarze ln, startpos, "%s", p + startpos);
877a5a8f14Sschwarze while (p[*pos] != '\0')
888351ebcfSschwarze (*pos)++;
8917e37242Sschwarze return;
908351ebcfSschwarze }
91eaa4dcb4Sschwarze }
928351ebcfSschwarze
93a93944b2Sschwarze dat = mandoc_malloc(sizeof(*dat));
942791bd1cSschwarze dat->layout = cp;
95a93944b2Sschwarze dat->next = NULL;
96a93944b2Sschwarze dat->string = NULL;
97a93944b2Sschwarze dat->hspans = 0;
98a93944b2Sschwarze dat->vspans = 0;
99a93944b2Sschwarze dat->block = 0;
1002791bd1cSschwarze dat->pos = TBL_DATA_NONE;
101a93944b2Sschwarze
102a93944b2Sschwarze /*
103a93944b2Sschwarze * Increment the number of vertical spans in a data cell above,
104a93944b2Sschwarze * if this cell vertically extends one or more cells above.
105a93944b2Sschwarze * The iteration must be done over data rows,
106a93944b2Sschwarze * not over layout rows, because one layout row
107a93944b2Sschwarze * can be reused for more than one data row.
108a93944b2Sschwarze */
109a93944b2Sschwarze
1107a5a8f14Sschwarze if (cp->pos == TBL_CELL_DOWN ||
111fbc27a2fSschwarze (*pos - startpos == 2 &&
112fbc27a2fSschwarze p[startpos] == '\\' && p[startpos + 1] == '^')) {
113a93944b2Sschwarze pdp = dp;
114a93944b2Sschwarze while ((pdp = pdp->prev) != NULL) {
115a93944b2Sschwarze pdat = pdp->first;
116a93944b2Sschwarze while (pdat != NULL &&
117a93944b2Sschwarze pdat->layout->col < dat->layout->col)
118a93944b2Sschwarze pdat = pdat->next;
119a93944b2Sschwarze if (pdat == NULL)
120a93944b2Sschwarze break;
1217a5a8f14Sschwarze if (pdat->layout->pos != TBL_CELL_DOWN &&
1227a5a8f14Sschwarze strcmp(pdat->string, "\\^") != 0) {
123a93944b2Sschwarze pdat->vspans++;
124a93944b2Sschwarze break;
125a93944b2Sschwarze }
126a93944b2Sschwarze }
127a93944b2Sschwarze }
128a93944b2Sschwarze
129a93944b2Sschwarze /*
130a93944b2Sschwarze * Count the number of horizontal spans to the right of this cell.
131a93944b2Sschwarze * This is purely a matter of the layout, independent of the data.
132a93944b2Sschwarze */
133a93944b2Sschwarze
13421da0636Sschwarze for (cp = cp->next; cp != NULL; cp = cp->next)
13521da0636Sschwarze if (cp->pos == TBL_CELL_SPAN)
136a93944b2Sschwarze dat->hspans++;
1378351ebcfSschwarze else
1388351ebcfSschwarze break;
1398351ebcfSschwarze
140a93944b2Sschwarze /* Append the new data cell to the data row. */
141a93944b2Sschwarze
14221da0636Sschwarze if (dp->last == NULL)
14321da0636Sschwarze dp->first = dat;
14421da0636Sschwarze else
1452791bd1cSschwarze dp->last->next = dat;
1462791bd1cSschwarze dp->last = dat;
1472791bd1cSschwarze
148*2fb659f9Sschwarze /* Strip leading and trailing spaces, if requested. */
149393cb51eSschwarze
150fbc27a2fSschwarze endpos = *pos;
151fbc27a2fSschwarze if (dp->opts->opts & TBL_OPT_NOSPACE) {
152fbc27a2fSschwarze while (p[startpos] == ' ')
153fbc27a2fSschwarze startpos++;
154fbc27a2fSschwarze while (endpos > startpos && p[endpos - 1] == ' ')
155fbc27a2fSschwarze endpos--;
156fbc27a2fSschwarze }
157*2fb659f9Sschwarze
158*2fb659f9Sschwarze /*
159*2fb659f9Sschwarze * Check for a continued-data scope opening. This consists of a
160*2fb659f9Sschwarze * trailing `T{' at the end of the line. Subsequent lines,
161*2fb659f9Sschwarze * until a standalone `T}', are included in our cell.
162*2fb659f9Sschwarze */
163*2fb659f9Sschwarze
164*2fb659f9Sschwarze if (endpos - startpos == 2 &&
165*2fb659f9Sschwarze p[startpos] == 'T' && p[startpos + 1] == '{') {
166*2fb659f9Sschwarze tbl->part = TBL_PART_CDATA;
167*2fb659f9Sschwarze return;
168*2fb659f9Sschwarze }
169*2fb659f9Sschwarze
170fbc27a2fSschwarze dat->string = mandoc_strndup(p + startpos, endpos - startpos);
171393cb51eSschwarze
1727a5a8f14Sschwarze if (p[*pos] != '\0')
1732791bd1cSschwarze (*pos)++;
174393cb51eSschwarze
175393cb51eSschwarze if ( ! strcmp(dat->string, "_"))
1762791bd1cSschwarze dat->pos = TBL_DATA_HORIZ;
177393cb51eSschwarze else if ( ! strcmp(dat->string, "="))
1782791bd1cSschwarze dat->pos = TBL_DATA_DHORIZ;
179393cb51eSschwarze else if ( ! strcmp(dat->string, "\\_"))
1802791bd1cSschwarze dat->pos = TBL_DATA_NHORIZ;
181393cb51eSschwarze else if ( ! strcmp(dat->string, "\\="))
1822791bd1cSschwarze dat->pos = TBL_DATA_NDHORIZ;
183393cb51eSschwarze else
1842791bd1cSschwarze dat->pos = TBL_DATA_DATA;
1852791bd1cSschwarze
18621da0636Sschwarze if ((dat->layout->pos == TBL_CELL_HORIZ ||
18721da0636Sschwarze dat->layout->pos == TBL_CELL_DHORIZ ||
18821da0636Sschwarze dat->layout->pos == TBL_CELL_DOWN) &&
18921da0636Sschwarze dat->pos == TBL_DATA_DATA && *dat->string != '\0')
19092ff8da6Sschwarze mandoc_msg(MANDOCERR_TBLDATA_SPAN,
191fbc27a2fSschwarze ln, startpos, "%s", dat->string);
192393cb51eSschwarze }
193393cb51eSschwarze
194d93f8561Sschwarze void
tbl_cdata(struct tbl_node * tbl,int ln,const char * p,int pos)19539aede77Sschwarze tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos)
1962791bd1cSschwarze {
1972791bd1cSschwarze struct tbl_dat *dat;
1982791bd1cSschwarze size_t sz;
199ec04407bSschwarze
200ec04407bSschwarze dat = tbl->last_span->last;
201ec04407bSschwarze
202ec04407bSschwarze if (p[pos] == 'T' && p[pos + 1] == '}') {
203ec04407bSschwarze pos += 2;
20486dcadabSschwarze if (tbl->opts.opts & TBL_OPT_NOSPACE)
20586dcadabSschwarze while (p[pos] == ' ')
20686dcadabSschwarze pos++;
207ec04407bSschwarze if (p[pos] == tbl->opts.tab) {
208ec04407bSschwarze tbl->part = TBL_PART_DATA;
209ec04407bSschwarze pos++;
2103569c652Sschwarze while (p[pos] != '\0')
21117e37242Sschwarze getdata(tbl, tbl->last_span, ln, p, &pos);
212d93f8561Sschwarze return;
21321da0636Sschwarze } else if (p[pos] == '\0') {
2142791bd1cSschwarze tbl->part = TBL_PART_DATA;
215d93f8561Sschwarze return;
2162791bd1cSschwarze }
2172791bd1cSschwarze
218ec04407bSschwarze /* Fallthrough: T} is part of a word. */
219ec04407bSschwarze }
2202791bd1cSschwarze
2218351ebcfSschwarze dat->pos = TBL_DATA_DATA;
2222c3e66c4Sschwarze dat->block = 1;
2238351ebcfSschwarze
22421da0636Sschwarze if (dat->string != NULL) {
22592ff8da6Sschwarze sz = strlen(p + pos) + strlen(dat->string) + 2;
2262791bd1cSschwarze dat->string = mandoc_realloc(dat->string, sz);
2270b2f1307Sschwarze (void)strlcat(dat->string, " ", sz);
22892ff8da6Sschwarze (void)strlcat(dat->string, p + pos, sz);
2292791bd1cSschwarze } else
23092ff8da6Sschwarze dat->string = mandoc_strdup(p + pos);
2312791bd1cSschwarze
23221da0636Sschwarze if (dat->layout->pos == TBL_CELL_DOWN)
233a5a5f808Sschwarze mandoc_msg(MANDOCERR_TBLDATA_SPAN,
234a5a5f808Sschwarze ln, pos, "%s", dat->string);
2352791bd1cSschwarze }
236393cb51eSschwarze
237a7f5ddf9Sschwarze static struct tbl_span *
newspan(struct tbl_node * tbl,int line,struct tbl_row * rp)2382733b5d3Sschwarze newspan(struct tbl_node *tbl, int line, struct tbl_row *rp)
239a7f5ddf9Sschwarze {
240a7f5ddf9Sschwarze struct tbl_span *dp;
241a7f5ddf9Sschwarze
24221da0636Sschwarze dp = mandoc_calloc(1, sizeof(*dp));
2432733b5d3Sschwarze dp->line = line;
244c7497e73Sschwarze dp->opts = &tbl->opts;
245a7f5ddf9Sschwarze dp->layout = rp;
246fd9b947eSschwarze dp->prev = tbl->last_span;
247a7f5ddf9Sschwarze
248fd9b947eSschwarze if (dp->prev == NULL) {
249fd9b947eSschwarze tbl->first_span = dp;
2507374e4feSschwarze tbl->current_span = NULL;
251fd9b947eSschwarze } else
252fd9b947eSschwarze dp->prev->next = dp;
253fd9b947eSschwarze tbl->last_span = dp;
254a7f5ddf9Sschwarze
255526e306bSschwarze return dp;
256a7f5ddf9Sschwarze }
257a7f5ddf9Sschwarze
25817e37242Sschwarze void
tbl_data(struct tbl_node * tbl,int ln,const char * p,int pos)25939aede77Sschwarze tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos)
260393cb51eSschwarze {
2612791bd1cSschwarze struct tbl_row *rp;
262eaa4dcb4Sschwarze struct tbl_cell *cp;
263acb996efSschwarze struct tbl_span *sp;
264393cb51eSschwarze
26561deec73Sschwarze for (sp = tbl->last_span; sp != NULL; sp = sp->prev)
26661deec73Sschwarze if (sp->pos == TBL_SPAN_DATA)
26761deec73Sschwarze break;
26861deec73Sschwarze rp = sp == NULL ? tbl->first_row :
26961deec73Sschwarze sp->layout->next == NULL ? sp->layout : sp->layout->next;
270eaa4dcb4Sschwarze assert(rp != NULL);
2718351ebcfSschwarze
272ef2f4505Sschwarze if (p[1] == '\0') {
273ef2f4505Sschwarze switch (p[0]) {
274ef2f4505Sschwarze case '.':
275ef2f4505Sschwarze /*
276ef2f4505Sschwarze * Empty request lines must be handled here
277ef2f4505Sschwarze * and cannot be discarded in roff_parseln()
278ef2f4505Sschwarze * because in the layout section, they
279ef2f4505Sschwarze * are significant and end the layout.
280ef2f4505Sschwarze */
281ef2f4505Sschwarze return;
282ef2f4505Sschwarze case '_':
283acb996efSschwarze sp = newspan(tbl, ln, rp);
284eaa4dcb4Sschwarze sp->pos = TBL_SPAN_HORIZ;
28517e37242Sschwarze return;
286ef2f4505Sschwarze case '=':
287acb996efSschwarze sp = newspan(tbl, ln, rp);
288eaa4dcb4Sschwarze sp->pos = TBL_SPAN_DHORIZ;
28917e37242Sschwarze return;
290ef2f4505Sschwarze default:
291ef2f4505Sschwarze break;
292ef2f4505Sschwarze }
293393cb51eSschwarze }
2942791bd1cSschwarze
295acb996efSschwarze /*
296acb996efSschwarze * If the layout row contains nothing but horizontal lines,
297acb996efSschwarze * allocate an empty span for it and assign the current span
298acb996efSschwarze * to the next layout row accepting data.
299acb996efSschwarze */
300acb996efSschwarze
301acb996efSschwarze while (rp->next != NULL) {
302acb996efSschwarze if (rp->last->col + 1 < tbl->opts.cols)
303acb996efSschwarze break;
304acb996efSschwarze for (cp = rp->first; cp != NULL; cp = cp->next)
305acb996efSschwarze if (cp->pos != TBL_CELL_HORIZ &&
306acb996efSschwarze cp->pos != TBL_CELL_DHORIZ)
307acb996efSschwarze break;
308acb996efSschwarze if (cp != NULL)
309acb996efSschwarze break;
310acb996efSschwarze sp = newspan(tbl, ln, rp);
311acb996efSschwarze sp->pos = TBL_SPAN_DATA;
312acb996efSschwarze rp = rp->next;
313acb996efSschwarze }
314acb996efSschwarze
315acb996efSschwarze /* Process a real data row. */
316acb996efSschwarze
317acb996efSschwarze sp = newspan(tbl, ln, rp);
318acb996efSschwarze sp->pos = TBL_SPAN_DATA;
31921da0636Sschwarze while (p[pos] != '\0')
320eaa4dcb4Sschwarze getdata(tbl, sp, ln, p, &pos);
321393cb51eSschwarze }
322