xref: /dflybsd-src/contrib/mdocml/tag.c (revision 1e4d43f9c96723e4e55543d240f182e1aac9a4c2)
1*99db7d0eSSascha Wildner /* $Id: tag.c,v 1.36 2020/04/19 16:36:16 schwarze Exp $ */
254ba9607SSascha Wildner /*
3*99db7d0eSSascha Wildner  * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org>
454ba9607SSascha Wildner  *
554ba9607SSascha Wildner  * Permission to use, copy, modify, and distribute this software for any
654ba9607SSascha Wildner  * purpose with or without fee is hereby granted, provided that the above
754ba9607SSascha Wildner  * copyright notice and this permission notice appear in all copies.
854ba9607SSascha Wildner  *
954ba9607SSascha Wildner  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1054ba9607SSascha Wildner  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1154ba9607SSascha Wildner  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1254ba9607SSascha Wildner  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1354ba9607SSascha Wildner  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1454ba9607SSascha Wildner  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1554ba9607SSascha Wildner  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*99db7d0eSSascha Wildner  *
17*99db7d0eSSascha Wildner  * Functions to tag syntax tree nodes.
18*99db7d0eSSascha Wildner  * For internal use by mandoc(1) validation modules only.
1954ba9607SSascha Wildner  */
2054ba9607SSascha Wildner #include "config.h"
2154ba9607SSascha Wildner 
2254ba9607SSascha Wildner #include <sys/types.h>
2354ba9607SSascha Wildner 
24*99db7d0eSSascha Wildner #include <assert.h>
2554ba9607SSascha Wildner #include <limits.h>
2654ba9607SSascha Wildner #include <stddef.h>
2754ba9607SSascha Wildner #include <stdint.h>
2854ba9607SSascha Wildner #include <stdlib.h>
2954ba9607SSascha Wildner #include <string.h>
3054ba9607SSascha Wildner 
3154ba9607SSascha Wildner #include "mandoc_aux.h"
3254ba9607SSascha Wildner #include "mandoc_ohash.h"
33*99db7d0eSSascha Wildner #include "roff.h"
34*99db7d0eSSascha Wildner #include "mdoc.h"
35*99db7d0eSSascha Wildner #include "roff_int.h"
3654ba9607SSascha Wildner #include "tag.h"
3754ba9607SSascha Wildner 
3854ba9607SSascha Wildner struct tag_entry {
39*99db7d0eSSascha Wildner 	struct roff_node **nodes;
40*99db7d0eSSascha Wildner 	size_t	 maxnodes;
41*99db7d0eSSascha Wildner 	size_t	 nnodes;
4254ba9607SSascha Wildner 	int	 prio;
4354ba9607SSascha Wildner 	char	 s[];
4454ba9607SSascha Wildner };
4554ba9607SSascha Wildner 
46*99db7d0eSSascha Wildner static void		 tag_move_href(struct roff_man *,
47*99db7d0eSSascha Wildner 				struct roff_node *, const char *);
48*99db7d0eSSascha Wildner static void		 tag_move_id(struct roff_node *);
4954ba9607SSascha Wildner 
5054ba9607SSascha Wildner static struct ohash	 tag_data;
5154ba9607SSascha Wildner 
5254ba9607SSascha Wildner 
5354ba9607SSascha Wildner /*
54*99db7d0eSSascha Wildner  * Set up the ohash table to collect nodes
5554ba9607SSascha Wildner  * where various marked-up terms are documented.
5654ba9607SSascha Wildner  */
57*99db7d0eSSascha Wildner void
tag_alloc(void)58*99db7d0eSSascha Wildner tag_alloc(void)
59*99db7d0eSSascha Wildner {
6054ba9607SSascha Wildner 	mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
6154ba9607SSascha Wildner }
6254ba9607SSascha Wildner 
6354ba9607SSascha Wildner void
tag_free(void)64*99db7d0eSSascha Wildner tag_free(void)
6554ba9607SSascha Wildner {
6654ba9607SSascha Wildner 	struct tag_entry	*entry;
6754ba9607SSascha Wildner 	unsigned int		 slot;
6854ba9607SSascha Wildner 
69*99db7d0eSSascha Wildner 	if (tag_data.info.free == NULL)
7054ba9607SSascha Wildner 		return;
7154ba9607SSascha Wildner 	entry = ohash_first(&tag_data, &slot);
7254ba9607SSascha Wildner 	while (entry != NULL) {
73*99db7d0eSSascha Wildner 		free(entry->nodes);
7454ba9607SSascha Wildner 		free(entry);
7554ba9607SSascha Wildner 		entry = ohash_next(&tag_data, &slot);
7654ba9607SSascha Wildner 	}
7754ba9607SSascha Wildner 	ohash_delete(&tag_data);
78*99db7d0eSSascha Wildner 	tag_data.info.free = NULL;
7954ba9607SSascha Wildner }
8054ba9607SSascha Wildner 
81*99db7d0eSSascha Wildner /*
82*99db7d0eSSascha Wildner  * Set a node where a term is defined,
83*99db7d0eSSascha Wildner  * unless it is already defined at a lower priority.
84*99db7d0eSSascha Wildner  */
8554ba9607SSascha Wildner void
tag_put(const char * s,int prio,struct roff_node * n)86*99db7d0eSSascha Wildner tag_put(const char *s, int prio, struct roff_node *n)
8754ba9607SSascha Wildner {
88*99db7d0eSSascha Wildner 	struct tag_entry	*entry;
89*99db7d0eSSascha Wildner 	struct roff_node	*nold;
90*99db7d0eSSascha Wildner 	const char		*se;
91*99db7d0eSSascha Wildner 	size_t			 len;
92*99db7d0eSSascha Wildner 	unsigned int		 slot;
9354ba9607SSascha Wildner 
94*99db7d0eSSascha Wildner 	assert(prio <= TAG_FALLBACK);
95*99db7d0eSSascha Wildner 
96*99db7d0eSSascha Wildner 	if (s == NULL) {
97*99db7d0eSSascha Wildner 		if (n->child == NULL || n->child->type != ROFFT_TEXT)
98*99db7d0eSSascha Wildner 			return;
99*99db7d0eSSascha Wildner 		s = n->child->string;
100*99db7d0eSSascha Wildner 		switch (s[0]) {
101*99db7d0eSSascha Wildner 		case '-':
102*99db7d0eSSascha Wildner 			s++;
103*99db7d0eSSascha Wildner 			break;
104*99db7d0eSSascha Wildner 		case '\\':
105*99db7d0eSSascha Wildner 			switch (s[1]) {
106*99db7d0eSSascha Wildner 			case '&':
107*99db7d0eSSascha Wildner 			case '-':
108*99db7d0eSSascha Wildner 			case 'e':
109*99db7d0eSSascha Wildner 				s += 2;
110*99db7d0eSSascha Wildner 				break;
111*99db7d0eSSascha Wildner 			default:
112*99db7d0eSSascha Wildner 				break;
11354ba9607SSascha Wildner 			}
114*99db7d0eSSascha Wildner 			break;
115*99db7d0eSSascha Wildner 		default:
116*99db7d0eSSascha Wildner 			break;
117*99db7d0eSSascha Wildner 		}
11854ba9607SSascha Wildner 	}
11954ba9607SSascha Wildner 
120*99db7d0eSSascha Wildner 	/*
121*99db7d0eSSascha Wildner 	 * Skip whitespace and escapes and whatever follows,
122*99db7d0eSSascha Wildner 	 * and if there is any, downgrade the priority.
123*99db7d0eSSascha Wildner 	 */
124*99db7d0eSSascha Wildner 
125*99db7d0eSSascha Wildner 	len = strcspn(s, " \t\\");
126*99db7d0eSSascha Wildner 	if (len == 0)
127*99db7d0eSSascha Wildner 		return;
128*99db7d0eSSascha Wildner 
129*99db7d0eSSascha Wildner 	se = s + len;
130*99db7d0eSSascha Wildner 	if (*se != '\0' && prio < TAG_WEAK)
131*99db7d0eSSascha Wildner 		prio = TAG_WEAK;
132*99db7d0eSSascha Wildner 
133*99db7d0eSSascha Wildner 	slot = ohash_qlookupi(&tag_data, s, &se);
134*99db7d0eSSascha Wildner 	entry = ohash_find(&tag_data, slot);
135*99db7d0eSSascha Wildner 
136*99db7d0eSSascha Wildner 	/* Build a new entry. */
137*99db7d0eSSascha Wildner 
138*99db7d0eSSascha Wildner 	if (entry == NULL) {
139*99db7d0eSSascha Wildner 		entry = mandoc_malloc(sizeof(*entry) + len + 1);
140*99db7d0eSSascha Wildner 		memcpy(entry->s, s, len);
141*99db7d0eSSascha Wildner 		entry->s[len] = '\0';
142*99db7d0eSSascha Wildner 		entry->nodes = NULL;
143*99db7d0eSSascha Wildner 		entry->maxnodes = entry->nnodes = 0;
144*99db7d0eSSascha Wildner 		ohash_insert(&tag_data, slot, entry);
145*99db7d0eSSascha Wildner 	}
146*99db7d0eSSascha Wildner 
147*99db7d0eSSascha Wildner 	/*
148*99db7d0eSSascha Wildner 	 * Lower priority numbers take precedence.
149*99db7d0eSSascha Wildner 	 * If a better entry is already present, ignore the new one.
150*99db7d0eSSascha Wildner 	 */
151*99db7d0eSSascha Wildner 
152*99db7d0eSSascha Wildner 	else if (entry->prio < prio)
153*99db7d0eSSascha Wildner 			return;
154*99db7d0eSSascha Wildner 
155*99db7d0eSSascha Wildner 	/*
156*99db7d0eSSascha Wildner 	 * If the existing entry is worse, clear it.
157*99db7d0eSSascha Wildner 	 * In addition, a tag with priority TAG_FALLBACK
158*99db7d0eSSascha Wildner 	 * is only used if the tag occurs exactly once.
159*99db7d0eSSascha Wildner 	 */
160*99db7d0eSSascha Wildner 
161*99db7d0eSSascha Wildner 	else if (entry->prio > prio || prio == TAG_FALLBACK) {
162*99db7d0eSSascha Wildner 		while (entry->nnodes > 0) {
163*99db7d0eSSascha Wildner 			nold = entry->nodes[--entry->nnodes];
164*99db7d0eSSascha Wildner 			nold->flags &= ~NODE_ID;
165*99db7d0eSSascha Wildner 			free(nold->tag);
166*99db7d0eSSascha Wildner 			nold->tag = NULL;
167*99db7d0eSSascha Wildner 		}
168*99db7d0eSSascha Wildner 		if (prio == TAG_FALLBACK) {
169*99db7d0eSSascha Wildner 			entry->prio = TAG_DELETE;
170*99db7d0eSSascha Wildner 			return;
171*99db7d0eSSascha Wildner 		}
172*99db7d0eSSascha Wildner 	}
173*99db7d0eSSascha Wildner 
174*99db7d0eSSascha Wildner 	/* Remember the new node. */
175*99db7d0eSSascha Wildner 
176*99db7d0eSSascha Wildner 	if (entry->maxnodes == entry->nnodes) {
177*99db7d0eSSascha Wildner 		entry->maxnodes += 4;
178*99db7d0eSSascha Wildner 		entry->nodes = mandoc_reallocarray(entry->nodes,
179*99db7d0eSSascha Wildner 		    entry->maxnodes, sizeof(*entry->nodes));
180*99db7d0eSSascha Wildner 	}
181*99db7d0eSSascha Wildner 	entry->nodes[entry->nnodes++] = n;
182*99db7d0eSSascha Wildner 	entry->prio = prio;
183*99db7d0eSSascha Wildner 	n->flags |= NODE_ID;
184*99db7d0eSSascha Wildner 	if (n->child == NULL || n->child->string != s || *se != '\0') {
185*99db7d0eSSascha Wildner 		assert(n->tag == NULL);
186*99db7d0eSSascha Wildner 		n->tag = mandoc_strndup(s, len);
187*99db7d0eSSascha Wildner 	}
188*99db7d0eSSascha Wildner }
189*99db7d0eSSascha Wildner 
190*99db7d0eSSascha Wildner int
tag_exists(const char * tag)191*99db7d0eSSascha Wildner tag_exists(const char *tag)
192*99db7d0eSSascha Wildner {
193*99db7d0eSSascha Wildner 	return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
194*99db7d0eSSascha Wildner }
195*99db7d0eSSascha Wildner 
196*99db7d0eSSascha Wildner /*
197*99db7d0eSSascha Wildner  * For in-line elements, move the link target
198*99db7d0eSSascha Wildner  * to the enclosing paragraph when appropriate.
199*99db7d0eSSascha Wildner  */
20054ba9607SSascha Wildner static void
tag_move_id(struct roff_node * n)201*99db7d0eSSascha Wildner tag_move_id(struct roff_node *n)
20254ba9607SSascha Wildner {
203*99db7d0eSSascha Wildner 	struct roff_node *np;
20454ba9607SSascha Wildner 
205*99db7d0eSSascha Wildner 	np = n;
206*99db7d0eSSascha Wildner 	for (;;) {
207*99db7d0eSSascha Wildner 		if (np->prev != NULL)
208*99db7d0eSSascha Wildner 			np = np->prev;
209*99db7d0eSSascha Wildner 		else if ((np = np->parent) == NULL)
210*99db7d0eSSascha Wildner 			return;
211*99db7d0eSSascha Wildner 		switch (np->tok) {
212*99db7d0eSSascha Wildner 		case MDOC_It:
213*99db7d0eSSascha Wildner 			switch (np->parent->parent->norm->Bl.type) {
214*99db7d0eSSascha Wildner 			case LIST_column:
215*99db7d0eSSascha Wildner 				/* Target the ROFFT_BLOCK = <tr>. */
216*99db7d0eSSascha Wildner 				np = np->parent;
217*99db7d0eSSascha Wildner 				break;
218*99db7d0eSSascha Wildner 			case LIST_diag:
219*99db7d0eSSascha Wildner 			case LIST_hang:
220*99db7d0eSSascha Wildner 			case LIST_inset:
221*99db7d0eSSascha Wildner 			case LIST_ohang:
222*99db7d0eSSascha Wildner 			case LIST_tag:
223*99db7d0eSSascha Wildner 				/* Target the ROFFT_HEAD = <dt>. */
224*99db7d0eSSascha Wildner 				np = np->parent->head;
225*99db7d0eSSascha Wildner 				break;
226*99db7d0eSSascha Wildner 			default:
227*99db7d0eSSascha Wildner 				/* Target the ROFF_BODY = <li>. */
228*99db7d0eSSascha Wildner 				break;
229*99db7d0eSSascha Wildner 			}
230*99db7d0eSSascha Wildner 			/* FALLTHROUGH */
231*99db7d0eSSascha Wildner 		case MDOC_Pp:	/* Target the ROFFT_ELEM = <p>. */
232*99db7d0eSSascha Wildner 			if (np->tag == NULL) {
233*99db7d0eSSascha Wildner 				np->tag = mandoc_strdup(n->tag == NULL ?
234*99db7d0eSSascha Wildner 				    n->child->string : n->tag);
235*99db7d0eSSascha Wildner 				np->flags |= NODE_ID;
236*99db7d0eSSascha Wildner 				n->flags &= ~NODE_ID;
237*99db7d0eSSascha Wildner 			}
238*99db7d0eSSascha Wildner 			return;
239*99db7d0eSSascha Wildner 		case MDOC_Sh:
240*99db7d0eSSascha Wildner 		case MDOC_Ss:
241*99db7d0eSSascha Wildner 		case MDOC_Bd:
242*99db7d0eSSascha Wildner 		case MDOC_Bl:
243*99db7d0eSSascha Wildner 		case MDOC_D1:
244*99db7d0eSSascha Wildner 		case MDOC_Dl:
245*99db7d0eSSascha Wildner 		case MDOC_Rs:
246*99db7d0eSSascha Wildner 			/* Do not move past major blocks. */
247*99db7d0eSSascha Wildner 			return;
248*99db7d0eSSascha Wildner 		default:
249*99db7d0eSSascha Wildner 			/*
250*99db7d0eSSascha Wildner 			 * Move past in-line content and partial
251*99db7d0eSSascha Wildner 			 * blocks, for example .It Xo or .It Bq Er.
252*99db7d0eSSascha Wildner 			 */
253*99db7d0eSSascha Wildner 			break;
254*99db7d0eSSascha Wildner 		}
255*99db7d0eSSascha Wildner 	}
256*99db7d0eSSascha Wildner }
257*99db7d0eSSascha Wildner 
258*99db7d0eSSascha Wildner /*
259*99db7d0eSSascha Wildner  * When a paragraph is tagged and starts with text,
260*99db7d0eSSascha Wildner  * move the permalink to the first few words.
261*99db7d0eSSascha Wildner  */
262*99db7d0eSSascha Wildner static void
tag_move_href(struct roff_man * man,struct roff_node * n,const char * tag)263*99db7d0eSSascha Wildner tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
264*99db7d0eSSascha Wildner {
265*99db7d0eSSascha Wildner 	char	*cp;
266*99db7d0eSSascha Wildner 
267*99db7d0eSSascha Wildner 	if (n == NULL || n->type != ROFFT_TEXT ||
268*99db7d0eSSascha Wildner 	    *n->string == '\0' || *n->string == ' ')
269*99db7d0eSSascha Wildner 		return;
270*99db7d0eSSascha Wildner 
271*99db7d0eSSascha Wildner 	cp = n->string;
272*99db7d0eSSascha Wildner 	while (cp != NULL && cp - n->string < 5)
273*99db7d0eSSascha Wildner 		cp = strchr(cp + 1, ' ');
274*99db7d0eSSascha Wildner 
275*99db7d0eSSascha Wildner 	/* If the first text node is longer, split it. */
276*99db7d0eSSascha Wildner 
277*99db7d0eSSascha Wildner 	if (cp != NULL && cp[1] != '\0') {
278*99db7d0eSSascha Wildner 		man->last = n;
279*99db7d0eSSascha Wildner 		man->next = ROFF_NEXT_SIBLING;
280*99db7d0eSSascha Wildner 		roff_word_alloc(man, n->line,
281*99db7d0eSSascha Wildner 		    n->pos + (cp - n->string), cp + 1);
282*99db7d0eSSascha Wildner 		man->last->flags = n->flags & ~NODE_LINE;
283*99db7d0eSSascha Wildner 		*cp = '\0';
284*99db7d0eSSascha Wildner 	}
285*99db7d0eSSascha Wildner 
286*99db7d0eSSascha Wildner 	assert(n->tag == NULL);
287*99db7d0eSSascha Wildner 	n->tag = mandoc_strdup(tag);
288*99db7d0eSSascha Wildner 	n->flags |= NODE_HREF;
289*99db7d0eSSascha Wildner }
290*99db7d0eSSascha Wildner 
291*99db7d0eSSascha Wildner /*
292*99db7d0eSSascha Wildner  * When all tags have been set, decide where to put
293*99db7d0eSSascha Wildner  * the associated permalinks, and maybe move some tags
294*99db7d0eSSascha Wildner  * to the beginning of the respective paragraphs.
295*99db7d0eSSascha Wildner  */
296*99db7d0eSSascha Wildner void
tag_postprocess(struct roff_man * man,struct roff_node * n)297*99db7d0eSSascha Wildner tag_postprocess(struct roff_man *man, struct roff_node *n)
298*99db7d0eSSascha Wildner {
299*99db7d0eSSascha Wildner 	if (n->flags & NODE_ID) {
300*99db7d0eSSascha Wildner 		switch (n->tok) {
301*99db7d0eSSascha Wildner 		case MDOC_Pp:
302*99db7d0eSSascha Wildner 			tag_move_href(man, n->next, n->tag);
303*99db7d0eSSascha Wildner 			break;
304*99db7d0eSSascha Wildner 		case MDOC_Bd:
305*99db7d0eSSascha Wildner 		case MDOC_D1:
306*99db7d0eSSascha Wildner 		case MDOC_Dl:
307*99db7d0eSSascha Wildner 			tag_move_href(man, n->child, n->tag);
308*99db7d0eSSascha Wildner 			break;
309*99db7d0eSSascha Wildner 		case MDOC_Bl:
310*99db7d0eSSascha Wildner 			/* XXX No permalink for now. */
311*99db7d0eSSascha Wildner 			break;
312*99db7d0eSSascha Wildner 		default:
313*99db7d0eSSascha Wildner 			if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
314*99db7d0eSSascha Wildner 				tag_move_id(n);
315*99db7d0eSSascha Wildner 			if (n->tok != MDOC_Tg)
316*99db7d0eSSascha Wildner 				n->flags |= NODE_HREF;
317*99db7d0eSSascha Wildner 			else if ((n->flags & NODE_ID) == 0) {
318*99db7d0eSSascha Wildner 				n->flags |= NODE_NOPRT;
319*99db7d0eSSascha Wildner 				free(n->tag);
320*99db7d0eSSascha Wildner 				n->tag = NULL;
321*99db7d0eSSascha Wildner 			}
322*99db7d0eSSascha Wildner 			break;
323*99db7d0eSSascha Wildner 		}
324*99db7d0eSSascha Wildner 	}
325*99db7d0eSSascha Wildner 	for (n = n->child; n != NULL; n = n->next)
326*99db7d0eSSascha Wildner 		tag_postprocess(man, n);
32754ba9607SSascha Wildner }
328