1*99db7d0eSSascha Wildner /* $Id: tag.c,v 1.36 2020/04/19 16:36:16 schwarze Exp $ */
254ba9607SSascha Wildner /*
3*99db7d0eSSascha Wildner * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org>
454ba9607SSascha Wildner *
554ba9607SSascha Wildner * Permission to use, copy, modify, and distribute this software for any
654ba9607SSascha Wildner * purpose with or without fee is hereby granted, provided that the above
754ba9607SSascha Wildner * copyright notice and this permission notice appear in all copies.
854ba9607SSascha Wildner *
954ba9607SSascha Wildner * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1054ba9607SSascha Wildner * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1154ba9607SSascha Wildner * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1254ba9607SSascha Wildner * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1354ba9607SSascha Wildner * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1454ba9607SSascha Wildner * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1554ba9607SSascha Wildner * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*99db7d0eSSascha Wildner *
17*99db7d0eSSascha Wildner * Functions to tag syntax tree nodes.
18*99db7d0eSSascha Wildner * For internal use by mandoc(1) validation modules only.
1954ba9607SSascha Wildner */
2054ba9607SSascha Wildner #include "config.h"
2154ba9607SSascha Wildner
2254ba9607SSascha Wildner #include <sys/types.h>
2354ba9607SSascha Wildner
24*99db7d0eSSascha Wildner #include <assert.h>
2554ba9607SSascha Wildner #include <limits.h>
2654ba9607SSascha Wildner #include <stddef.h>
2754ba9607SSascha Wildner #include <stdint.h>
2854ba9607SSascha Wildner #include <stdlib.h>
2954ba9607SSascha Wildner #include <string.h>
3054ba9607SSascha Wildner
3154ba9607SSascha Wildner #include "mandoc_aux.h"
3254ba9607SSascha Wildner #include "mandoc_ohash.h"
33*99db7d0eSSascha Wildner #include "roff.h"
34*99db7d0eSSascha Wildner #include "mdoc.h"
35*99db7d0eSSascha Wildner #include "roff_int.h"
3654ba9607SSascha Wildner #include "tag.h"
3754ba9607SSascha Wildner
3854ba9607SSascha Wildner struct tag_entry {
39*99db7d0eSSascha Wildner struct roff_node **nodes;
40*99db7d0eSSascha Wildner size_t maxnodes;
41*99db7d0eSSascha Wildner size_t nnodes;
4254ba9607SSascha Wildner int prio;
4354ba9607SSascha Wildner char s[];
4454ba9607SSascha Wildner };
4554ba9607SSascha Wildner
46*99db7d0eSSascha Wildner static void tag_move_href(struct roff_man *,
47*99db7d0eSSascha Wildner struct roff_node *, const char *);
48*99db7d0eSSascha Wildner static void tag_move_id(struct roff_node *);
4954ba9607SSascha Wildner
5054ba9607SSascha Wildner static struct ohash tag_data;
5154ba9607SSascha Wildner
5254ba9607SSascha Wildner
5354ba9607SSascha Wildner /*
54*99db7d0eSSascha Wildner * Set up the ohash table to collect nodes
5554ba9607SSascha Wildner * where various marked-up terms are documented.
5654ba9607SSascha Wildner */
57*99db7d0eSSascha Wildner void
tag_alloc(void)58*99db7d0eSSascha Wildner tag_alloc(void)
59*99db7d0eSSascha Wildner {
6054ba9607SSascha Wildner mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
6154ba9607SSascha Wildner }
6254ba9607SSascha Wildner
6354ba9607SSascha Wildner void
tag_free(void)64*99db7d0eSSascha Wildner tag_free(void)
6554ba9607SSascha Wildner {
6654ba9607SSascha Wildner struct tag_entry *entry;
6754ba9607SSascha Wildner unsigned int slot;
6854ba9607SSascha Wildner
69*99db7d0eSSascha Wildner if (tag_data.info.free == NULL)
7054ba9607SSascha Wildner return;
7154ba9607SSascha Wildner entry = ohash_first(&tag_data, &slot);
7254ba9607SSascha Wildner while (entry != NULL) {
73*99db7d0eSSascha Wildner free(entry->nodes);
7454ba9607SSascha Wildner free(entry);
7554ba9607SSascha Wildner entry = ohash_next(&tag_data, &slot);
7654ba9607SSascha Wildner }
7754ba9607SSascha Wildner ohash_delete(&tag_data);
78*99db7d0eSSascha Wildner tag_data.info.free = NULL;
7954ba9607SSascha Wildner }
8054ba9607SSascha Wildner
81*99db7d0eSSascha Wildner /*
82*99db7d0eSSascha Wildner * Set a node where a term is defined,
83*99db7d0eSSascha Wildner * unless it is already defined at a lower priority.
84*99db7d0eSSascha Wildner */
8554ba9607SSascha Wildner void
tag_put(const char * s,int prio,struct roff_node * n)86*99db7d0eSSascha Wildner tag_put(const char *s, int prio, struct roff_node *n)
8754ba9607SSascha Wildner {
88*99db7d0eSSascha Wildner struct tag_entry *entry;
89*99db7d0eSSascha Wildner struct roff_node *nold;
90*99db7d0eSSascha Wildner const char *se;
91*99db7d0eSSascha Wildner size_t len;
92*99db7d0eSSascha Wildner unsigned int slot;
9354ba9607SSascha Wildner
94*99db7d0eSSascha Wildner assert(prio <= TAG_FALLBACK);
95*99db7d0eSSascha Wildner
96*99db7d0eSSascha Wildner if (s == NULL) {
97*99db7d0eSSascha Wildner if (n->child == NULL || n->child->type != ROFFT_TEXT)
98*99db7d0eSSascha Wildner return;
99*99db7d0eSSascha Wildner s = n->child->string;
100*99db7d0eSSascha Wildner switch (s[0]) {
101*99db7d0eSSascha Wildner case '-':
102*99db7d0eSSascha Wildner s++;
103*99db7d0eSSascha Wildner break;
104*99db7d0eSSascha Wildner case '\\':
105*99db7d0eSSascha Wildner switch (s[1]) {
106*99db7d0eSSascha Wildner case '&':
107*99db7d0eSSascha Wildner case '-':
108*99db7d0eSSascha Wildner case 'e':
109*99db7d0eSSascha Wildner s += 2;
110*99db7d0eSSascha Wildner break;
111*99db7d0eSSascha Wildner default:
112*99db7d0eSSascha Wildner break;
11354ba9607SSascha Wildner }
114*99db7d0eSSascha Wildner break;
115*99db7d0eSSascha Wildner default:
116*99db7d0eSSascha Wildner break;
117*99db7d0eSSascha Wildner }
11854ba9607SSascha Wildner }
11954ba9607SSascha Wildner
120*99db7d0eSSascha Wildner /*
121*99db7d0eSSascha Wildner * Skip whitespace and escapes and whatever follows,
122*99db7d0eSSascha Wildner * and if there is any, downgrade the priority.
123*99db7d0eSSascha Wildner */
124*99db7d0eSSascha Wildner
125*99db7d0eSSascha Wildner len = strcspn(s, " \t\\");
126*99db7d0eSSascha Wildner if (len == 0)
127*99db7d0eSSascha Wildner return;
128*99db7d0eSSascha Wildner
129*99db7d0eSSascha Wildner se = s + len;
130*99db7d0eSSascha Wildner if (*se != '\0' && prio < TAG_WEAK)
131*99db7d0eSSascha Wildner prio = TAG_WEAK;
132*99db7d0eSSascha Wildner
133*99db7d0eSSascha Wildner slot = ohash_qlookupi(&tag_data, s, &se);
134*99db7d0eSSascha Wildner entry = ohash_find(&tag_data, slot);
135*99db7d0eSSascha Wildner
136*99db7d0eSSascha Wildner /* Build a new entry. */
137*99db7d0eSSascha Wildner
138*99db7d0eSSascha Wildner if (entry == NULL) {
139*99db7d0eSSascha Wildner entry = mandoc_malloc(sizeof(*entry) + len + 1);
140*99db7d0eSSascha Wildner memcpy(entry->s, s, len);
141*99db7d0eSSascha Wildner entry->s[len] = '\0';
142*99db7d0eSSascha Wildner entry->nodes = NULL;
143*99db7d0eSSascha Wildner entry->maxnodes = entry->nnodes = 0;
144*99db7d0eSSascha Wildner ohash_insert(&tag_data, slot, entry);
145*99db7d0eSSascha Wildner }
146*99db7d0eSSascha Wildner
147*99db7d0eSSascha Wildner /*
148*99db7d0eSSascha Wildner * Lower priority numbers take precedence.
149*99db7d0eSSascha Wildner * If a better entry is already present, ignore the new one.
150*99db7d0eSSascha Wildner */
151*99db7d0eSSascha Wildner
152*99db7d0eSSascha Wildner else if (entry->prio < prio)
153*99db7d0eSSascha Wildner return;
154*99db7d0eSSascha Wildner
155*99db7d0eSSascha Wildner /*
156*99db7d0eSSascha Wildner * If the existing entry is worse, clear it.
157*99db7d0eSSascha Wildner * In addition, a tag with priority TAG_FALLBACK
158*99db7d0eSSascha Wildner * is only used if the tag occurs exactly once.
159*99db7d0eSSascha Wildner */
160*99db7d0eSSascha Wildner
161*99db7d0eSSascha Wildner else if (entry->prio > prio || prio == TAG_FALLBACK) {
162*99db7d0eSSascha Wildner while (entry->nnodes > 0) {
163*99db7d0eSSascha Wildner nold = entry->nodes[--entry->nnodes];
164*99db7d0eSSascha Wildner nold->flags &= ~NODE_ID;
165*99db7d0eSSascha Wildner free(nold->tag);
166*99db7d0eSSascha Wildner nold->tag = NULL;
167*99db7d0eSSascha Wildner }
168*99db7d0eSSascha Wildner if (prio == TAG_FALLBACK) {
169*99db7d0eSSascha Wildner entry->prio = TAG_DELETE;
170*99db7d0eSSascha Wildner return;
171*99db7d0eSSascha Wildner }
172*99db7d0eSSascha Wildner }
173*99db7d0eSSascha Wildner
174*99db7d0eSSascha Wildner /* Remember the new node. */
175*99db7d0eSSascha Wildner
176*99db7d0eSSascha Wildner if (entry->maxnodes == entry->nnodes) {
177*99db7d0eSSascha Wildner entry->maxnodes += 4;
178*99db7d0eSSascha Wildner entry->nodes = mandoc_reallocarray(entry->nodes,
179*99db7d0eSSascha Wildner entry->maxnodes, sizeof(*entry->nodes));
180*99db7d0eSSascha Wildner }
181*99db7d0eSSascha Wildner entry->nodes[entry->nnodes++] = n;
182*99db7d0eSSascha Wildner entry->prio = prio;
183*99db7d0eSSascha Wildner n->flags |= NODE_ID;
184*99db7d0eSSascha Wildner if (n->child == NULL || n->child->string != s || *se != '\0') {
185*99db7d0eSSascha Wildner assert(n->tag == NULL);
186*99db7d0eSSascha Wildner n->tag = mandoc_strndup(s, len);
187*99db7d0eSSascha Wildner }
188*99db7d0eSSascha Wildner }
189*99db7d0eSSascha Wildner
190*99db7d0eSSascha Wildner int
tag_exists(const char * tag)191*99db7d0eSSascha Wildner tag_exists(const char *tag)
192*99db7d0eSSascha Wildner {
193*99db7d0eSSascha Wildner return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
194*99db7d0eSSascha Wildner }
195*99db7d0eSSascha Wildner
196*99db7d0eSSascha Wildner /*
197*99db7d0eSSascha Wildner * For in-line elements, move the link target
198*99db7d0eSSascha Wildner * to the enclosing paragraph when appropriate.
199*99db7d0eSSascha Wildner */
20054ba9607SSascha Wildner static void
tag_move_id(struct roff_node * n)201*99db7d0eSSascha Wildner tag_move_id(struct roff_node *n)
20254ba9607SSascha Wildner {
203*99db7d0eSSascha Wildner struct roff_node *np;
20454ba9607SSascha Wildner
205*99db7d0eSSascha Wildner np = n;
206*99db7d0eSSascha Wildner for (;;) {
207*99db7d0eSSascha Wildner if (np->prev != NULL)
208*99db7d0eSSascha Wildner np = np->prev;
209*99db7d0eSSascha Wildner else if ((np = np->parent) == NULL)
210*99db7d0eSSascha Wildner return;
211*99db7d0eSSascha Wildner switch (np->tok) {
212*99db7d0eSSascha Wildner case MDOC_It:
213*99db7d0eSSascha Wildner switch (np->parent->parent->norm->Bl.type) {
214*99db7d0eSSascha Wildner case LIST_column:
215*99db7d0eSSascha Wildner /* Target the ROFFT_BLOCK = <tr>. */
216*99db7d0eSSascha Wildner np = np->parent;
217*99db7d0eSSascha Wildner break;
218*99db7d0eSSascha Wildner case LIST_diag:
219*99db7d0eSSascha Wildner case LIST_hang:
220*99db7d0eSSascha Wildner case LIST_inset:
221*99db7d0eSSascha Wildner case LIST_ohang:
222*99db7d0eSSascha Wildner case LIST_tag:
223*99db7d0eSSascha Wildner /* Target the ROFFT_HEAD = <dt>. */
224*99db7d0eSSascha Wildner np = np->parent->head;
225*99db7d0eSSascha Wildner break;
226*99db7d0eSSascha Wildner default:
227*99db7d0eSSascha Wildner /* Target the ROFF_BODY = <li>. */
228*99db7d0eSSascha Wildner break;
229*99db7d0eSSascha Wildner }
230*99db7d0eSSascha Wildner /* FALLTHROUGH */
231*99db7d0eSSascha Wildner case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */
232*99db7d0eSSascha Wildner if (np->tag == NULL) {
233*99db7d0eSSascha Wildner np->tag = mandoc_strdup(n->tag == NULL ?
234*99db7d0eSSascha Wildner n->child->string : n->tag);
235*99db7d0eSSascha Wildner np->flags |= NODE_ID;
236*99db7d0eSSascha Wildner n->flags &= ~NODE_ID;
237*99db7d0eSSascha Wildner }
238*99db7d0eSSascha Wildner return;
239*99db7d0eSSascha Wildner case MDOC_Sh:
240*99db7d0eSSascha Wildner case MDOC_Ss:
241*99db7d0eSSascha Wildner case MDOC_Bd:
242*99db7d0eSSascha Wildner case MDOC_Bl:
243*99db7d0eSSascha Wildner case MDOC_D1:
244*99db7d0eSSascha Wildner case MDOC_Dl:
245*99db7d0eSSascha Wildner case MDOC_Rs:
246*99db7d0eSSascha Wildner /* Do not move past major blocks. */
247*99db7d0eSSascha Wildner return;
248*99db7d0eSSascha Wildner default:
249*99db7d0eSSascha Wildner /*
250*99db7d0eSSascha Wildner * Move past in-line content and partial
251*99db7d0eSSascha Wildner * blocks, for example .It Xo or .It Bq Er.
252*99db7d0eSSascha Wildner */
253*99db7d0eSSascha Wildner break;
254*99db7d0eSSascha Wildner }
255*99db7d0eSSascha Wildner }
256*99db7d0eSSascha Wildner }
257*99db7d0eSSascha Wildner
258*99db7d0eSSascha Wildner /*
259*99db7d0eSSascha Wildner * When a paragraph is tagged and starts with text,
260*99db7d0eSSascha Wildner * move the permalink to the first few words.
261*99db7d0eSSascha Wildner */
262*99db7d0eSSascha Wildner static void
tag_move_href(struct roff_man * man,struct roff_node * n,const char * tag)263*99db7d0eSSascha Wildner tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
264*99db7d0eSSascha Wildner {
265*99db7d0eSSascha Wildner char *cp;
266*99db7d0eSSascha Wildner
267*99db7d0eSSascha Wildner if (n == NULL || n->type != ROFFT_TEXT ||
268*99db7d0eSSascha Wildner *n->string == '\0' || *n->string == ' ')
269*99db7d0eSSascha Wildner return;
270*99db7d0eSSascha Wildner
271*99db7d0eSSascha Wildner cp = n->string;
272*99db7d0eSSascha Wildner while (cp != NULL && cp - n->string < 5)
273*99db7d0eSSascha Wildner cp = strchr(cp + 1, ' ');
274*99db7d0eSSascha Wildner
275*99db7d0eSSascha Wildner /* If the first text node is longer, split it. */
276*99db7d0eSSascha Wildner
277*99db7d0eSSascha Wildner if (cp != NULL && cp[1] != '\0') {
278*99db7d0eSSascha Wildner man->last = n;
279*99db7d0eSSascha Wildner man->next = ROFF_NEXT_SIBLING;
280*99db7d0eSSascha Wildner roff_word_alloc(man, n->line,
281*99db7d0eSSascha Wildner n->pos + (cp - n->string), cp + 1);
282*99db7d0eSSascha Wildner man->last->flags = n->flags & ~NODE_LINE;
283*99db7d0eSSascha Wildner *cp = '\0';
284*99db7d0eSSascha Wildner }
285*99db7d0eSSascha Wildner
286*99db7d0eSSascha Wildner assert(n->tag == NULL);
287*99db7d0eSSascha Wildner n->tag = mandoc_strdup(tag);
288*99db7d0eSSascha Wildner n->flags |= NODE_HREF;
289*99db7d0eSSascha Wildner }
290*99db7d0eSSascha Wildner
291*99db7d0eSSascha Wildner /*
292*99db7d0eSSascha Wildner * When all tags have been set, decide where to put
293*99db7d0eSSascha Wildner * the associated permalinks, and maybe move some tags
294*99db7d0eSSascha Wildner * to the beginning of the respective paragraphs.
295*99db7d0eSSascha Wildner */
296*99db7d0eSSascha Wildner void
tag_postprocess(struct roff_man * man,struct roff_node * n)297*99db7d0eSSascha Wildner tag_postprocess(struct roff_man *man, struct roff_node *n)
298*99db7d0eSSascha Wildner {
299*99db7d0eSSascha Wildner if (n->flags & NODE_ID) {
300*99db7d0eSSascha Wildner switch (n->tok) {
301*99db7d0eSSascha Wildner case MDOC_Pp:
302*99db7d0eSSascha Wildner tag_move_href(man, n->next, n->tag);
303*99db7d0eSSascha Wildner break;
304*99db7d0eSSascha Wildner case MDOC_Bd:
305*99db7d0eSSascha Wildner case MDOC_D1:
306*99db7d0eSSascha Wildner case MDOC_Dl:
307*99db7d0eSSascha Wildner tag_move_href(man, n->child, n->tag);
308*99db7d0eSSascha Wildner break;
309*99db7d0eSSascha Wildner case MDOC_Bl:
310*99db7d0eSSascha Wildner /* XXX No permalink for now. */
311*99db7d0eSSascha Wildner break;
312*99db7d0eSSascha Wildner default:
313*99db7d0eSSascha Wildner if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
314*99db7d0eSSascha Wildner tag_move_id(n);
315*99db7d0eSSascha Wildner if (n->tok != MDOC_Tg)
316*99db7d0eSSascha Wildner n->flags |= NODE_HREF;
317*99db7d0eSSascha Wildner else if ((n->flags & NODE_ID) == 0) {
318*99db7d0eSSascha Wildner n->flags |= NODE_NOPRT;
319*99db7d0eSSascha Wildner free(n->tag);
320*99db7d0eSSascha Wildner n->tag = NULL;
321*99db7d0eSSascha Wildner }
322*99db7d0eSSascha Wildner break;
323*99db7d0eSSascha Wildner }
324*99db7d0eSSascha Wildner }
325*99db7d0eSSascha Wildner for (n = n->child; n != NULL; n = n->next)
326*99db7d0eSSascha Wildner tag_postprocess(man, n);
32754ba9607SSascha Wildner }
328