1*6167eca2Schristos /* Id: tbl_data.c,v 1.52 2019/02/09 16:00:39 schwarze Exp */
2c0d9444aSjoerg /*
348741257Sjoerg * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*6167eca2Schristos * Copyright (c) 2011,2015,2017,2018,2019 Ingo Schwarze <schwarze@openbsd.org>
5c0d9444aSjoerg *
6c0d9444aSjoerg * Permission to use, copy, modify, and distribute this software for any
7c0d9444aSjoerg * purpose with or without fee is hereby granted, provided that the above
8c0d9444aSjoerg * copyright notice and this permission notice appear in all copies.
9c0d9444aSjoerg *
10c0d9444aSjoerg * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11c0d9444aSjoerg * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12c0d9444aSjoerg * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13c0d9444aSjoerg * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14c0d9444aSjoerg * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15c0d9444aSjoerg * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16c0d9444aSjoerg * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17c0d9444aSjoerg */
18c0d9444aSjoerg #include "config.h"
195c413d0cSchristos
205c413d0cSchristos #include <sys/types.h>
21c0d9444aSjoerg
22c0d9444aSjoerg #include <assert.h>
23c0d9444aSjoerg #include <ctype.h>
24*6167eca2Schristos #include <stdio.h>
25c0d9444aSjoerg #include <stdlib.h>
26c0d9444aSjoerg #include <string.h>
27c0d9444aSjoerg #include <time.h>
28c0d9444aSjoerg
295c413d0cSchristos #include "mandoc_aux.h"
30*6167eca2Schristos #include "mandoc.h"
31*6167eca2Schristos #include "tbl.h"
32c0d9444aSjoerg #include "libmandoc.h"
33*6167eca2Schristos #include "tbl_int.h"
34c0d9444aSjoerg
355c413d0cSchristos static void getdata(struct tbl_node *, struct tbl_span *,
36c0d9444aSjoerg int, const char *, int *);
3748741257Sjoerg static struct tbl_span *newspan(struct tbl_node *, int,
3848741257Sjoerg struct tbl_row *);
39c0d9444aSjoerg
405c413d0cSchristos
415c413d0cSchristos static void
getdata(struct tbl_node * tbl,struct tbl_span * dp,int ln,const char * p,int * pos)42f60f86a6Schristos getdata(struct tbl_node *tbl, struct tbl_span *dp,
43c0d9444aSjoerg int ln, const char *p, int *pos)
44c0d9444aSjoerg {
45*6167eca2Schristos struct tbl_dat *dat, *pdat;
46c0d9444aSjoerg struct tbl_cell *cp;
47*6167eca2Schristos struct tbl_span *pdp;
485c413d0cSchristos int sv;
49c0d9444aSjoerg
50*6167eca2Schristos /*
51*6167eca2Schristos * Determine the length of the string in the cell
52*6167eca2Schristos * and advance the parse point to the end of the cell.
53*6167eca2Schristos */
54*6167eca2Schristos
55*6167eca2Schristos sv = *pos;
56*6167eca2Schristos while (p[*pos] != '\0' && p[*pos] != tbl->opts.tab)
57*6167eca2Schristos (*pos)++;
58*6167eca2Schristos
595c413d0cSchristos /* Advance to the next layout cell, skipping spanners. */
60c0d9444aSjoerg
615c413d0cSchristos cp = dp->last == NULL ? dp->layout->first : dp->last->layout->next;
625c413d0cSchristos while (cp != NULL && cp->pos == TBL_CELL_SPAN)
63c0d9444aSjoerg cp = cp->next;
64c0d9444aSjoerg
6548741257Sjoerg /*
6614e7489eSchristos * If the current layout row is out of cells, allocate
6714e7489eSchristos * a new cell if another row of the table has at least
6814e7489eSchristos * this number of columns, or discard the input if we
6914e7489eSchristos * are beyond the last column of the table as a whole.
7048741257Sjoerg */
7148741257Sjoerg
725c413d0cSchristos if (cp == NULL) {
7314e7489eSchristos if (dp->layout->last->col + 1 < dp->opts->cols) {
7414e7489eSchristos cp = mandoc_calloc(1, sizeof(*cp));
7514e7489eSchristos cp->pos = TBL_CELL_LEFT;
7614e7489eSchristos dp->layout->last->next = cp;
7714e7489eSchristos cp->col = dp->layout->last->col + 1;
7814e7489eSchristos dp->layout->last = cp;
7914e7489eSchristos } else {
80*6167eca2Schristos mandoc_msg(MANDOCERR_TBLDATA_EXTRA,
81*6167eca2Schristos ln, sv, "%s", p + sv);
82*6167eca2Schristos while (p[*pos] != '\0')
8348741257Sjoerg (*pos)++;
845c413d0cSchristos return;
8548741257Sjoerg }
8614e7489eSchristos }
8748741257Sjoerg
88*6167eca2Schristos dat = mandoc_malloc(sizeof(*dat));
89c0d9444aSjoerg dat->layout = cp;
90*6167eca2Schristos dat->next = NULL;
91*6167eca2Schristos dat->string = NULL;
92*6167eca2Schristos dat->hspans = 0;
93*6167eca2Schristos dat->vspans = 0;
94*6167eca2Schristos dat->block = 0;
95c0d9444aSjoerg dat->pos = TBL_DATA_NONE;
96*6167eca2Schristos
97*6167eca2Schristos /*
98*6167eca2Schristos * Increment the number of vertical spans in a data cell above,
99*6167eca2Schristos * if this cell vertically extends one or more cells above.
100*6167eca2Schristos * The iteration must be done over data rows,
101*6167eca2Schristos * not over layout rows, because one layout row
102*6167eca2Schristos * can be reused for more than one data row.
103*6167eca2Schristos */
104*6167eca2Schristos
105*6167eca2Schristos if (cp->pos == TBL_CELL_DOWN ||
106*6167eca2Schristos (*pos - sv == 2 && p[sv] == '\\' && p[sv + 1] == '^')) {
107*6167eca2Schristos pdp = dp;
108*6167eca2Schristos while ((pdp = pdp->prev) != NULL) {
109*6167eca2Schristos pdat = pdp->first;
110*6167eca2Schristos while (pdat != NULL &&
111*6167eca2Schristos pdat->layout->col < dat->layout->col)
112*6167eca2Schristos pdat = pdat->next;
113*6167eca2Schristos if (pdat == NULL)
114*6167eca2Schristos break;
115*6167eca2Schristos if (pdat->layout->pos != TBL_CELL_DOWN &&
116*6167eca2Schristos strcmp(pdat->string, "\\^") != 0) {
117*6167eca2Schristos pdat->vspans++;
118*6167eca2Schristos break;
119*6167eca2Schristos }
120*6167eca2Schristos }
121*6167eca2Schristos }
122*6167eca2Schristos
123*6167eca2Schristos /*
124*6167eca2Schristos * Count the number of horizontal spans to the right of this cell.
125*6167eca2Schristos * This is purely a matter of the layout, independent of the data.
126*6167eca2Schristos */
127*6167eca2Schristos
1285c413d0cSchristos for (cp = cp->next; cp != NULL; cp = cp->next)
1295c413d0cSchristos if (cp->pos == TBL_CELL_SPAN)
130*6167eca2Schristos dat->hspans++;
13148741257Sjoerg else
13248741257Sjoerg break;
13348741257Sjoerg
134*6167eca2Schristos /* Append the new data cell to the data row. */
135*6167eca2Schristos
1365c413d0cSchristos if (dp->last == NULL)
1375c413d0cSchristos dp->first = dat;
1385c413d0cSchristos else
139c0d9444aSjoerg dp->last->next = dat;
140c0d9444aSjoerg dp->last = dat;
141c0d9444aSjoerg
142c0d9444aSjoerg /*
143c0d9444aSjoerg * Check for a continued-data scope opening. This consists of a
144c0d9444aSjoerg * trailing `T{' at the end of the line. Subsequent lines,
145c0d9444aSjoerg * until a standalone `T}', are included in our cell.
146c0d9444aSjoerg */
147c0d9444aSjoerg
1485c413d0cSchristos if (*pos - sv == 2 && p[sv] == 'T' && p[sv + 1] == '{') {
149c0d9444aSjoerg tbl->part = TBL_PART_CDATA;
1505c413d0cSchristos return;
151c0d9444aSjoerg }
152c0d9444aSjoerg
1535c413d0cSchristos dat->string = mandoc_strndup(p + sv, *pos - sv);
154c0d9444aSjoerg
155*6167eca2Schristos if (p[*pos] != '\0')
156c0d9444aSjoerg (*pos)++;
157c0d9444aSjoerg
158c0d9444aSjoerg if ( ! strcmp(dat->string, "_"))
159c0d9444aSjoerg dat->pos = TBL_DATA_HORIZ;
160c0d9444aSjoerg else if ( ! strcmp(dat->string, "="))
161c0d9444aSjoerg dat->pos = TBL_DATA_DHORIZ;
162c0d9444aSjoerg else if ( ! strcmp(dat->string, "\\_"))
163c0d9444aSjoerg dat->pos = TBL_DATA_NHORIZ;
164c0d9444aSjoerg else if ( ! strcmp(dat->string, "\\="))
165c0d9444aSjoerg dat->pos = TBL_DATA_NDHORIZ;
166c0d9444aSjoerg else
167c0d9444aSjoerg dat->pos = TBL_DATA_DATA;
168c0d9444aSjoerg
1695c413d0cSchristos if ((dat->layout->pos == TBL_CELL_HORIZ ||
1705c413d0cSchristos dat->layout->pos == TBL_CELL_DHORIZ ||
1715c413d0cSchristos dat->layout->pos == TBL_CELL_DOWN) &&
1725c413d0cSchristos dat->pos == TBL_DATA_DATA && *dat->string != '\0')
1735c413d0cSchristos mandoc_msg(MANDOCERR_TBLDATA_SPAN,
174*6167eca2Schristos ln, sv, "%s", dat->string);
175c0d9444aSjoerg }
176c0d9444aSjoerg
17714e7489eSchristos void
tbl_cdata(struct tbl_node * tbl,int ln,const char * p,int pos)1785c413d0cSchristos tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos)
179c0d9444aSjoerg {
180c0d9444aSjoerg struct tbl_dat *dat;
181c0d9444aSjoerg size_t sz;
182c0d9444aSjoerg
183c0d9444aSjoerg dat = tbl->last_span->last;
184c0d9444aSjoerg
185c0d9444aSjoerg if (p[pos] == 'T' && p[pos + 1] == '}') {
186c0d9444aSjoerg pos += 2;
187c0d9444aSjoerg if (p[pos] == tbl->opts.tab) {
188c0d9444aSjoerg tbl->part = TBL_PART_DATA;
189c0d9444aSjoerg pos++;
190f47368cfSchristos while (p[pos] != '\0')
1915c413d0cSchristos getdata(tbl, tbl->last_span, ln, p, &pos);
19214e7489eSchristos return;
1935c413d0cSchristos } else if (p[pos] == '\0') {
194c0d9444aSjoerg tbl->part = TBL_PART_DATA;
19514e7489eSchristos return;
196c0d9444aSjoerg }
197c0d9444aSjoerg
198c0d9444aSjoerg /* Fallthrough: T} is part of a word. */
199c0d9444aSjoerg }
200c0d9444aSjoerg
20148741257Sjoerg dat->pos = TBL_DATA_DATA;
20214e7489eSchristos dat->block = 1;
20348741257Sjoerg
2045c413d0cSchristos if (dat->string != NULL) {
2055c413d0cSchristos sz = strlen(p + pos) + strlen(dat->string) + 2;
206c0d9444aSjoerg dat->string = mandoc_realloc(dat->string, sz);
2075c413d0cSchristos (void)strlcat(dat->string, " ", sz);
2085c413d0cSchristos (void)strlcat(dat->string, p + pos, sz);
209c0d9444aSjoerg } else
2105c413d0cSchristos dat->string = mandoc_strdup(p + pos);
211c0d9444aSjoerg
2125c413d0cSchristos if (dat->layout->pos == TBL_CELL_DOWN)
213*6167eca2Schristos mandoc_msg(MANDOCERR_TBLDATA_SPAN,
214*6167eca2Schristos ln, pos, "%s", dat->string);
215c0d9444aSjoerg }
216c0d9444aSjoerg
21748741257Sjoerg static struct tbl_span *
newspan(struct tbl_node * tbl,int line,struct tbl_row * rp)21848741257Sjoerg newspan(struct tbl_node *tbl, int line, struct tbl_row *rp)
21948741257Sjoerg {
22048741257Sjoerg struct tbl_span *dp;
22148741257Sjoerg
2225c413d0cSchristos dp = mandoc_calloc(1, sizeof(*dp));
22348741257Sjoerg dp->line = line;
224603fc4ebSjoerg dp->opts = &tbl->opts;
22548741257Sjoerg dp->layout = rp;
2265c413d0cSchristos dp->prev = tbl->last_span;
22748741257Sjoerg
2285c413d0cSchristos if (dp->prev == NULL) {
2295c413d0cSchristos tbl->first_span = dp;
23048741257Sjoerg tbl->current_span = NULL;
2315c413d0cSchristos } else
2325c413d0cSchristos dp->prev->next = dp;
2335c413d0cSchristos tbl->last_span = dp;
23448741257Sjoerg
235f47368cfSchristos return dp;
23648741257Sjoerg }
23748741257Sjoerg
2385c413d0cSchristos void
tbl_data(struct tbl_node * tbl,int ln,const char * p,int pos)2395c413d0cSchristos tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos)
240c0d9444aSjoerg {
241c0d9444aSjoerg struct tbl_row *rp;
24214e7489eSchristos struct tbl_cell *cp;
24314e7489eSchristos struct tbl_span *sp;
244c0d9444aSjoerg
24514e7489eSchristos rp = (sp = tbl->last_span) == NULL ? tbl->first_row :
24614e7489eSchristos sp->pos == TBL_SPAN_DATA && sp->layout->next != NULL ?
24714e7489eSchristos sp->layout->next : sp->layout;
248c0d9444aSjoerg
24914e7489eSchristos assert(rp != NULL);
250c0d9444aSjoerg
251*6167eca2Schristos if (p[1] == '\0') {
252*6167eca2Schristos switch (p[0]) {
253*6167eca2Schristos case '.':
254*6167eca2Schristos /*
255*6167eca2Schristos * Empty request lines must be handled here
256*6167eca2Schristos * and cannot be discarded in roff_parseln()
257*6167eca2Schristos * because in the layout section, they
258*6167eca2Schristos * are significant and end the layout.
259*6167eca2Schristos */
260*6167eca2Schristos return;
261*6167eca2Schristos case '_':
26214e7489eSchristos sp = newspan(tbl, ln, rp);
26314e7489eSchristos sp->pos = TBL_SPAN_HORIZ;
2645c413d0cSchristos return;
265*6167eca2Schristos case '=':
26614e7489eSchristos sp = newspan(tbl, ln, rp);
26714e7489eSchristos sp->pos = TBL_SPAN_DHORIZ;
2685c413d0cSchristos return;
269*6167eca2Schristos default:
270*6167eca2Schristos break;
271*6167eca2Schristos }
272c0d9444aSjoerg }
273c0d9444aSjoerg
27414e7489eSchristos /*
27514e7489eSchristos * If the layout row contains nothing but horizontal lines,
27614e7489eSchristos * allocate an empty span for it and assign the current span
27714e7489eSchristos * to the next layout row accepting data.
27814e7489eSchristos */
279c0d9444aSjoerg
28014e7489eSchristos while (rp->next != NULL) {
28114e7489eSchristos if (rp->last->col + 1 < tbl->opts.cols)
28214e7489eSchristos break;
28314e7489eSchristos for (cp = rp->first; cp != NULL; cp = cp->next)
28414e7489eSchristos if (cp->pos != TBL_CELL_HORIZ &&
28514e7489eSchristos cp->pos != TBL_CELL_DHORIZ)
28614e7489eSchristos break;
28714e7489eSchristos if (cp != NULL)
28814e7489eSchristos break;
28914e7489eSchristos sp = newspan(tbl, ln, rp);
29014e7489eSchristos sp->pos = TBL_SPAN_DATA;
29114e7489eSchristos rp = rp->next;
29214e7489eSchristos }
29314e7489eSchristos
29414e7489eSchristos /* Process a real data row. */
29514e7489eSchristos
29614e7489eSchristos sp = newspan(tbl, ln, rp);
29714e7489eSchristos sp->pos = TBL_SPAN_DATA;
2985c413d0cSchristos while (p[pos] != '\0')
29914e7489eSchristos getdata(tbl, sp, ln, p, &pos);
300c0d9444aSjoerg }
301