1*99db7d0eSSascha Wildner /* $Id: tbl_data.c,v 1.59 2021/09/10 13:24:38 schwarze Exp $ */
280387638SSascha Wildner /*
360e1e752SSascha Wildner * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*99db7d0eSSascha Wildner * Copyright (c) 2011,2015,2017-2019,2021 Ingo Schwarze <schwarze@openbsd.org>
580387638SSascha Wildner *
680387638SSascha Wildner * Permission to use, copy, modify, and distribute this software for any
780387638SSascha Wildner * purpose with or without fee is hereby granted, provided that the above
880387638SSascha Wildner * copyright notice and this permission notice appear in all copies.
980387638SSascha Wildner *
1080387638SSascha Wildner * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1180387638SSascha Wildner * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1280387638SSascha Wildner * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1380387638SSascha Wildner * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1480387638SSascha Wildner * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1580387638SSascha Wildner * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1680387638SSascha Wildner * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1780387638SSascha Wildner */
1880387638SSascha Wildner #include "config.h"
1954ba9607SSascha Wildner
2054ba9607SSascha Wildner #include <sys/types.h>
2180387638SSascha Wildner
2280387638SSascha Wildner #include <assert.h>
2380387638SSascha Wildner #include <ctype.h>
24*99db7d0eSSascha Wildner #include <stdint.h>
2554ba9607SSascha Wildner #include <stdio.h>
2680387638SSascha Wildner #include <stdlib.h>
2780387638SSascha Wildner #include <string.h>
2880387638SSascha Wildner #include <time.h>
2980387638SSascha Wildner
30070c62a6SFranco Fichtner #include "mandoc_aux.h"
3154ba9607SSascha Wildner #include "mandoc.h"
3254ba9607SSascha Wildner #include "tbl.h"
3380387638SSascha Wildner #include "libmandoc.h"
3454ba9607SSascha Wildner #include "tbl_int.h"
3580387638SSascha Wildner
3654ba9607SSascha Wildner static void getdata(struct tbl_node *, struct tbl_span *,
3780387638SSascha Wildner int, const char *, int *);
3860e1e752SSascha Wildner static struct tbl_span *newspan(struct tbl_node *, int,
3960e1e752SSascha Wildner struct tbl_row *);
4080387638SSascha Wildner
41070c62a6SFranco Fichtner
4254ba9607SSascha Wildner static void
getdata(struct tbl_node * tbl,struct tbl_span * dp,int ln,const char * p,int * pos)43070c62a6SFranco Fichtner getdata(struct tbl_node *tbl, struct tbl_span *dp,
4480387638SSascha Wildner int ln, const char *p, int *pos)
4580387638SSascha Wildner {
4654ba9607SSascha Wildner struct tbl_dat *dat, *pdat;
4780387638SSascha Wildner struct tbl_cell *cp;
4854ba9607SSascha Wildner struct tbl_span *pdp;
49*99db7d0eSSascha Wildner const char *ccp;
50*99db7d0eSSascha Wildner int startpos, endpos;
5180387638SSascha Wildner
5280387638SSascha Wildner /*
5354ba9607SSascha Wildner * Determine the length of the string in the cell
5454ba9607SSascha Wildner * and advance the parse point to the end of the cell.
5580387638SSascha Wildner */
5680387638SSascha Wildner
57*99db7d0eSSascha Wildner startpos = *pos;
58*99db7d0eSSascha Wildner ccp = p + startpos;
59*99db7d0eSSascha Wildner while (*ccp != '\0' && *ccp != tbl->opts.tab)
60*99db7d0eSSascha Wildner if (*ccp++ == '\\')
61*99db7d0eSSascha Wildner mandoc_escape(&ccp, NULL, NULL);
62*99db7d0eSSascha Wildner *pos = ccp - p;
6354ba9607SSascha Wildner
6454ba9607SSascha Wildner /* Advance to the next layout cell, skipping spanners. */
6554ba9607SSascha Wildner
6654ba9607SSascha Wildner cp = dp->last == NULL ? dp->layout->first : dp->last->layout->next;
6754ba9607SSascha Wildner while (cp != NULL && cp->pos == TBL_CELL_SPAN)
6880387638SSascha Wildner cp = cp->next;
6980387638SSascha Wildner
7060e1e752SSascha Wildner /*
7154ba9607SSascha Wildner * If the current layout row is out of cells, allocate
7254ba9607SSascha Wildner * a new cell if another row of the table has at least
7354ba9607SSascha Wildner * this number of columns, or discard the input if we
7454ba9607SSascha Wildner * are beyond the last column of the table as a whole.
7560e1e752SSascha Wildner */
7660e1e752SSascha Wildner
7754ba9607SSascha Wildner if (cp == NULL) {
7854ba9607SSascha Wildner if (dp->layout->last->col + 1 < dp->opts->cols) {
7954ba9607SSascha Wildner cp = mandoc_calloc(1, sizeof(*cp));
8054ba9607SSascha Wildner cp->pos = TBL_CELL_LEFT;
81*99db7d0eSSascha Wildner cp->font = ESCAPE_FONTROMAN;
82*99db7d0eSSascha Wildner cp->spacing = SIZE_MAX;
8354ba9607SSascha Wildner dp->layout->last->next = cp;
8454ba9607SSascha Wildner cp->col = dp->layout->last->col + 1;
8554ba9607SSascha Wildner dp->layout->last = cp;
8654ba9607SSascha Wildner } else {
8754ba9607SSascha Wildner mandoc_msg(MANDOCERR_TBLDATA_EXTRA,
88*99db7d0eSSascha Wildner ln, startpos, "%s", p + startpos);
8954ba9607SSascha Wildner while (p[*pos] != '\0')
9060e1e752SSascha Wildner (*pos)++;
9154ba9607SSascha Wildner return;
9254ba9607SSascha Wildner }
9360e1e752SSascha Wildner }
9460e1e752SSascha Wildner
9554ba9607SSascha Wildner dat = mandoc_malloc(sizeof(*dat));
9680387638SSascha Wildner dat->layout = cp;
9754ba9607SSascha Wildner dat->next = NULL;
9854ba9607SSascha Wildner dat->string = NULL;
9954ba9607SSascha Wildner dat->hspans = 0;
10054ba9607SSascha Wildner dat->vspans = 0;
10154ba9607SSascha Wildner dat->block = 0;
10280387638SSascha Wildner dat->pos = TBL_DATA_NONE;
10380387638SSascha Wildner
10454ba9607SSascha Wildner /*
10554ba9607SSascha Wildner * Increment the number of vertical spans in a data cell above,
10654ba9607SSascha Wildner * if this cell vertically extends one or more cells above.
10754ba9607SSascha Wildner * The iteration must be done over data rows,
10854ba9607SSascha Wildner * not over layout rows, because one layout row
10954ba9607SSascha Wildner * can be reused for more than one data row.
11054ba9607SSascha Wildner */
11160e1e752SSascha Wildner
11254ba9607SSascha Wildner if (cp->pos == TBL_CELL_DOWN ||
113*99db7d0eSSascha Wildner (*pos - startpos == 2 &&
114*99db7d0eSSascha Wildner p[startpos] == '\\' && p[startpos + 1] == '^')) {
11554ba9607SSascha Wildner pdp = dp;
11654ba9607SSascha Wildner while ((pdp = pdp->prev) != NULL) {
11754ba9607SSascha Wildner pdat = pdp->first;
11854ba9607SSascha Wildner while (pdat != NULL &&
11954ba9607SSascha Wildner pdat->layout->col < dat->layout->col)
12054ba9607SSascha Wildner pdat = pdat->next;
12154ba9607SSascha Wildner if (pdat == NULL)
12254ba9607SSascha Wildner break;
12354ba9607SSascha Wildner if (pdat->layout->pos != TBL_CELL_DOWN &&
12454ba9607SSascha Wildner strcmp(pdat->string, "\\^") != 0) {
12554ba9607SSascha Wildner pdat->vspans++;
12654ba9607SSascha Wildner break;
12754ba9607SSascha Wildner }
12854ba9607SSascha Wildner }
12954ba9607SSascha Wildner }
13054ba9607SSascha Wildner
13154ba9607SSascha Wildner /*
13254ba9607SSascha Wildner * Count the number of horizontal spans to the right of this cell.
13354ba9607SSascha Wildner * This is purely a matter of the layout, independent of the data.
13454ba9607SSascha Wildner */
13554ba9607SSascha Wildner
13654ba9607SSascha Wildner for (cp = cp->next; cp != NULL; cp = cp->next)
13754ba9607SSascha Wildner if (cp->pos == TBL_CELL_SPAN)
13854ba9607SSascha Wildner dat->hspans++;
13960e1e752SSascha Wildner else
14060e1e752SSascha Wildner break;
14160e1e752SSascha Wildner
14254ba9607SSascha Wildner /* Append the new data cell to the data row. */
14380387638SSascha Wildner
14454ba9607SSascha Wildner if (dp->last == NULL)
14554ba9607SSascha Wildner dp->first = dat;
14654ba9607SSascha Wildner else
14780387638SSascha Wildner dp->last->next = dat;
14880387638SSascha Wildner dp->last = dat;
14980387638SSascha Wildner
150*99db7d0eSSascha Wildner /* Strip leading and trailing spaces, if requested. */
151*99db7d0eSSascha Wildner
152*99db7d0eSSascha Wildner endpos = *pos;
153*99db7d0eSSascha Wildner if (dp->opts->opts & TBL_OPT_NOSPACE) {
154*99db7d0eSSascha Wildner while (p[startpos] == ' ')
155*99db7d0eSSascha Wildner startpos++;
156*99db7d0eSSascha Wildner while (endpos > startpos && p[endpos - 1] == ' ')
157*99db7d0eSSascha Wildner endpos--;
158*99db7d0eSSascha Wildner }
159*99db7d0eSSascha Wildner
16080387638SSascha Wildner /*
16180387638SSascha Wildner * Check for a continued-data scope opening. This consists of a
16280387638SSascha Wildner * trailing `T{' at the end of the line. Subsequent lines,
16380387638SSascha Wildner * until a standalone `T}', are included in our cell.
16480387638SSascha Wildner */
16580387638SSascha Wildner
166*99db7d0eSSascha Wildner if (endpos - startpos == 2 &&
167*99db7d0eSSascha Wildner p[startpos] == 'T' && p[startpos + 1] == '{') {
16880387638SSascha Wildner tbl->part = TBL_PART_CDATA;
16954ba9607SSascha Wildner return;
17080387638SSascha Wildner }
17180387638SSascha Wildner
172*99db7d0eSSascha Wildner dat->string = mandoc_strndup(p + startpos, endpos - startpos);
17360e1e752SSascha Wildner
17454ba9607SSascha Wildner if (p[*pos] != '\0')
17580387638SSascha Wildner (*pos)++;
17680387638SSascha Wildner
17780387638SSascha Wildner if ( ! strcmp(dat->string, "_"))
17880387638SSascha Wildner dat->pos = TBL_DATA_HORIZ;
17980387638SSascha Wildner else if ( ! strcmp(dat->string, "="))
18080387638SSascha Wildner dat->pos = TBL_DATA_DHORIZ;
18180387638SSascha Wildner else if ( ! strcmp(dat->string, "\\_"))
18280387638SSascha Wildner dat->pos = TBL_DATA_NHORIZ;
18380387638SSascha Wildner else if ( ! strcmp(dat->string, "\\="))
18480387638SSascha Wildner dat->pos = TBL_DATA_NDHORIZ;
18580387638SSascha Wildner else
18680387638SSascha Wildner dat->pos = TBL_DATA_DATA;
18780387638SSascha Wildner
18854ba9607SSascha Wildner if ((dat->layout->pos == TBL_CELL_HORIZ ||
18954ba9607SSascha Wildner dat->layout->pos == TBL_CELL_DHORIZ ||
19054ba9607SSascha Wildner dat->layout->pos == TBL_CELL_DOWN) &&
19154ba9607SSascha Wildner dat->pos == TBL_DATA_DATA && *dat->string != '\0')
19254ba9607SSascha Wildner mandoc_msg(MANDOCERR_TBLDATA_SPAN,
193*99db7d0eSSascha Wildner ln, startpos, "%s", dat->string);
19480387638SSascha Wildner }
19580387638SSascha Wildner
19654ba9607SSascha Wildner void
tbl_cdata(struct tbl_node * tbl,int ln,const char * p,int pos)19754ba9607SSascha Wildner tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos)
19880387638SSascha Wildner {
19980387638SSascha Wildner struct tbl_dat *dat;
20080387638SSascha Wildner size_t sz;
20180387638SSascha Wildner
20280387638SSascha Wildner dat = tbl->last_span->last;
20380387638SSascha Wildner
20480387638SSascha Wildner if (p[pos] == 'T' && p[pos + 1] == '}') {
20580387638SSascha Wildner pos += 2;
206*99db7d0eSSascha Wildner if (tbl->opts.opts & TBL_OPT_NOSPACE)
207*99db7d0eSSascha Wildner while (p[pos] == ' ')
208*99db7d0eSSascha Wildner pos++;
20980387638SSascha Wildner if (p[pos] == tbl->opts.tab) {
21080387638SSascha Wildner tbl->part = TBL_PART_DATA;
21180387638SSascha Wildner pos++;
21254ba9607SSascha Wildner while (p[pos] != '\0')
21354ba9607SSascha Wildner getdata(tbl, tbl->last_span, ln, p, &pos);
21454ba9607SSascha Wildner return;
21554ba9607SSascha Wildner } else if (p[pos] == '\0') {
21680387638SSascha Wildner tbl->part = TBL_PART_DATA;
21754ba9607SSascha Wildner return;
21880387638SSascha Wildner }
21980387638SSascha Wildner
22080387638SSascha Wildner /* Fallthrough: T} is part of a word. */
22180387638SSascha Wildner }
22280387638SSascha Wildner
22360e1e752SSascha Wildner dat->pos = TBL_DATA_DATA;
22454ba9607SSascha Wildner dat->block = 1;
22560e1e752SSascha Wildner
22654ba9607SSascha Wildner if (dat->string != NULL) {
22754ba9607SSascha Wildner sz = strlen(p + pos) + strlen(dat->string) + 2;
22880387638SSascha Wildner dat->string = mandoc_realloc(dat->string, sz);
229070c62a6SFranco Fichtner (void)strlcat(dat->string, " ", sz);
23054ba9607SSascha Wildner (void)strlcat(dat->string, p + pos, sz);
23180387638SSascha Wildner } else
23254ba9607SSascha Wildner dat->string = mandoc_strdup(p + pos);
23380387638SSascha Wildner
23454ba9607SSascha Wildner if (dat->layout->pos == TBL_CELL_DOWN)
23554ba9607SSascha Wildner mandoc_msg(MANDOCERR_TBLDATA_SPAN,
23654ba9607SSascha Wildner ln, pos, "%s", dat->string);
23780387638SSascha Wildner }
23880387638SSascha Wildner
23960e1e752SSascha Wildner static struct tbl_span *
newspan(struct tbl_node * tbl,int line,struct tbl_row * rp)24060e1e752SSascha Wildner newspan(struct tbl_node *tbl, int line, struct tbl_row *rp)
24160e1e752SSascha Wildner {
24260e1e752SSascha Wildner struct tbl_span *dp;
24360e1e752SSascha Wildner
24454ba9607SSascha Wildner dp = mandoc_calloc(1, sizeof(*dp));
24560e1e752SSascha Wildner dp->line = line;
246f88b6c16SFranco Fichtner dp->opts = &tbl->opts;
24760e1e752SSascha Wildner dp->layout = rp;
24854ba9607SSascha Wildner dp->prev = tbl->last_span;
24960e1e752SSascha Wildner
25054ba9607SSascha Wildner if (dp->prev == NULL) {
25154ba9607SSascha Wildner tbl->first_span = dp;
25260e1e752SSascha Wildner tbl->current_span = NULL;
25354ba9607SSascha Wildner } else
25454ba9607SSascha Wildner dp->prev->next = dp;
25554ba9607SSascha Wildner tbl->last_span = dp;
25654ba9607SSascha Wildner
25754ba9607SSascha Wildner return dp;
25860e1e752SSascha Wildner }
25960e1e752SSascha Wildner
26054ba9607SSascha Wildner void
tbl_data(struct tbl_node * tbl,int ln,const char * p,int pos)26154ba9607SSascha Wildner tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos)
26280387638SSascha Wildner {
26380387638SSascha Wildner struct tbl_row *rp;
26454ba9607SSascha Wildner struct tbl_cell *cp;
26554ba9607SSascha Wildner struct tbl_span *sp;
26680387638SSascha Wildner
267*99db7d0eSSascha Wildner for (sp = tbl->last_span; sp != NULL; sp = sp->prev)
268*99db7d0eSSascha Wildner if (sp->pos == TBL_SPAN_DATA)
269*99db7d0eSSascha Wildner break;
270*99db7d0eSSascha Wildner rp = sp == NULL ? tbl->first_row :
271*99db7d0eSSascha Wildner sp->layout->next == NULL ? sp->layout : sp->layout->next;
27254ba9607SSascha Wildner assert(rp != NULL);
27380387638SSascha Wildner
27454ba9607SSascha Wildner if (p[1] == '\0') {
27554ba9607SSascha Wildner switch (p[0]) {
27654ba9607SSascha Wildner case '.':
27780387638SSascha Wildner /*
27854ba9607SSascha Wildner * Empty request lines must be handled here
27954ba9607SSascha Wildner * and cannot be discarded in roff_parseln()
28054ba9607SSascha Wildner * because in the layout section, they
28154ba9607SSascha Wildner * are significant and end the layout.
28280387638SSascha Wildner */
28354ba9607SSascha Wildner return;
28454ba9607SSascha Wildner case '_':
28554ba9607SSascha Wildner sp = newspan(tbl, ln, rp);
28654ba9607SSascha Wildner sp->pos = TBL_SPAN_HORIZ;
28754ba9607SSascha Wildner return;
28854ba9607SSascha Wildner case '=':
28954ba9607SSascha Wildner sp = newspan(tbl, ln, rp);
29054ba9607SSascha Wildner sp->pos = TBL_SPAN_DHORIZ;
29154ba9607SSascha Wildner return;
29260e1e752SSascha Wildner default:
29360e1e752SSascha Wildner break;
29460e1e752SSascha Wildner }
29554ba9607SSascha Wildner }
29654ba9607SSascha Wildner
29754ba9607SSascha Wildner /*
29854ba9607SSascha Wildner * If the layout row contains nothing but horizontal lines,
29954ba9607SSascha Wildner * allocate an empty span for it and assign the current span
30054ba9607SSascha Wildner * to the next layout row accepting data.
30154ba9607SSascha Wildner */
30254ba9607SSascha Wildner
30354ba9607SSascha Wildner while (rp->next != NULL) {
30454ba9607SSascha Wildner if (rp->last->col + 1 < tbl->opts.cols)
30560e1e752SSascha Wildner break;
30654ba9607SSascha Wildner for (cp = rp->first; cp != NULL; cp = cp->next)
30754ba9607SSascha Wildner if (cp->pos != TBL_CELL_HORIZ &&
30854ba9607SSascha Wildner cp->pos != TBL_CELL_DHORIZ)
30954ba9607SSascha Wildner break;
31054ba9607SSascha Wildner if (cp != NULL)
31154ba9607SSascha Wildner break;
31254ba9607SSascha Wildner sp = newspan(tbl, ln, rp);
31354ba9607SSascha Wildner sp->pos = TBL_SPAN_DATA;
31454ba9607SSascha Wildner rp = rp->next;
31580387638SSascha Wildner }
31680387638SSascha Wildner
31754ba9607SSascha Wildner /* Process a real data row. */
31880387638SSascha Wildner
31954ba9607SSascha Wildner sp = newspan(tbl, ln, rp);
32054ba9607SSascha Wildner sp->pos = TBL_SPAN_DATA;
32154ba9607SSascha Wildner while (p[pos] != '\0')
32254ba9607SSascha Wildner getdata(tbl, sp, ln, p, &pos);
32380387638SSascha Wildner }
324