xref: /openbsd-src/usr.bin/mandoc/tag.c (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1 /* $OpenBSD: tag.c,v 1.37 2022/04/26 11:28:35 schwarze Exp $ */
2 /*
3  * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022
4  *               Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Functions to tag syntax tree nodes.
19  * For internal use by mandoc(1) validation modules only.
20  */
21 #include <sys/types.h>
22 
23 #include <assert.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #include "mandoc_aux.h"
31 #include "mandoc_ohash.h"
32 #include "roff.h"
33 #include "mdoc.h"
34 #include "roff_int.h"
35 #include "tag.h"
36 
37 struct tag_entry {
38 	struct roff_node **nodes;
39 	size_t	 maxnodes;
40 	size_t	 nnodes;
41 	int	 prio;
42 	char	 s[];
43 };
44 
45 static void		 tag_move_href(struct roff_man *,
46 				struct roff_node *, const char *);
47 static void		 tag_move_id(struct roff_node *);
48 
49 static struct ohash	 tag_data;
50 
51 
52 /*
53  * Set up the ohash table to collect nodes
54  * where various marked-up terms are documented.
55  */
56 void
57 tag_alloc(void)
58 {
59 	mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
60 }
61 
62 void
63 tag_free(void)
64 {
65 	struct tag_entry	*entry;
66 	unsigned int		 slot;
67 
68 	if (tag_data.info.free == NULL)
69 		return;
70 	entry = ohash_first(&tag_data, &slot);
71 	while (entry != NULL) {
72 		free(entry->nodes);
73 		free(entry);
74 		entry = ohash_next(&tag_data, &slot);
75 	}
76 	ohash_delete(&tag_data);
77 	tag_data.info.free = NULL;
78 }
79 
80 /*
81  * Set a node where a term is defined,
82  * unless the term is already defined at a lower priority.
83  */
84 void
85 tag_put(const char *s, int prio, struct roff_node *n)
86 {
87 	struct tag_entry	*entry;
88 	struct roff_node	*nold;
89 	const char		*se;
90 	size_t			 len;
91 	unsigned int		 slot;
92 
93 	assert(prio <= TAG_FALLBACK);
94 
95 	/*
96 	 * If the node is already tagged, the existing tag is
97 	 * explicit and we are now about to add an implicit tag.
98 	 * Don't do that; just skip implicit tagging if the author
99 	 * specified an explicit tag.
100 	 */
101 
102 	if (n->flags & NODE_ID)
103 		return;
104 
105 	/* Determine the implicit tag. */
106 
107 	if (s == NULL) {
108 		if (n->child == NULL || n->child->type != ROFFT_TEXT)
109 			return;
110 		s = n->child->string;
111 		switch (s[0]) {
112 		case '-':
113 			s++;
114 			break;
115 		case '\\':
116 			switch (s[1]) {
117 			case '&':
118 			case '-':
119 			case 'e':
120 				s += 2;
121 				break;
122 			default:
123 				break;
124 			}
125 			break;
126 		default:
127 			break;
128 		}
129 	}
130 
131 	/*
132 	 * Skip whitespace and escapes and whatever follows,
133 	 * and if there is any, downgrade the priority.
134 	 */
135 
136 	len = strcspn(s, " \t\\");
137 	if (len == 0)
138 		return;
139 
140 	se = s + len;
141 	if (*se != '\0' && prio < TAG_WEAK)
142 		prio = TAG_WEAK;
143 
144 	slot = ohash_qlookupi(&tag_data, s, &se);
145 	entry = ohash_find(&tag_data, slot);
146 
147 	/* Build a new entry. */
148 
149 	if (entry == NULL) {
150 		entry = mandoc_malloc(sizeof(*entry) + len + 1);
151 		memcpy(entry->s, s, len);
152 		entry->s[len] = '\0';
153 		entry->nodes = NULL;
154 		entry->maxnodes = entry->nnodes = 0;
155 		ohash_insert(&tag_data, slot, entry);
156 	}
157 
158 	/*
159 	 * Lower priority numbers take precedence.
160 	 * If a better entry is already present, ignore the new one.
161 	 */
162 
163 	else if (entry->prio < prio)
164 		return;
165 
166 	/*
167 	 * If the existing entry is worse, clear it.
168 	 * In addition, a tag with priority TAG_FALLBACK
169 	 * is only used if the tag occurs exactly once.
170 	 */
171 
172 	else if (entry->prio > prio || prio == TAG_FALLBACK) {
173 		while (entry->nnodes > 0) {
174 			nold = entry->nodes[--entry->nnodes];
175 			nold->flags &= ~NODE_ID;
176 			free(nold->tag);
177 			nold->tag = NULL;
178 		}
179 		if (prio == TAG_FALLBACK) {
180 			entry->prio = TAG_DELETE;
181 			return;
182 		}
183 	}
184 
185 	/* Remember the new node. */
186 
187 	if (entry->maxnodes == entry->nnodes) {
188 		entry->maxnodes += 4;
189 		entry->nodes = mandoc_reallocarray(entry->nodes,
190 		    entry->maxnodes, sizeof(*entry->nodes));
191 	}
192 	entry->nodes[entry->nnodes++] = n;
193 	entry->prio = prio;
194 	n->flags |= NODE_ID;
195 	if (n->child == NULL || n->child->string != s || *se != '\0') {
196 		assert(n->tag == NULL);
197 		n->tag = mandoc_strndup(s, len);
198 	}
199 }
200 
201 int
202 tag_exists(const char *tag)
203 {
204 	return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
205 }
206 
207 /*
208  * For in-line elements, move the link target
209  * to the enclosing paragraph when appropriate.
210  */
211 static void
212 tag_move_id(struct roff_node *n)
213 {
214 	struct roff_node *np;
215 
216 	np = n;
217 	for (;;) {
218 		if (np->prev != NULL)
219 			np = np->prev;
220 		else if ((np = np->parent) == NULL)
221 			return;
222 		switch (np->tok) {
223 		case MDOC_It:
224 			switch (np->parent->parent->norm->Bl.type) {
225 			case LIST_column:
226 				/* Target the ROFFT_BLOCK = <tr>. */
227 				np = np->parent;
228 				break;
229 			case LIST_diag:
230 			case LIST_hang:
231 			case LIST_inset:
232 			case LIST_ohang:
233 			case LIST_tag:
234 				/* Target the ROFFT_HEAD = <dt>. */
235 				np = np->parent->head;
236 				break;
237 			default:
238 				/* Target the ROFF_BODY = <li>. */
239 				break;
240 			}
241 			/* FALLTHROUGH */
242 		case MDOC_Pp:	/* Target the ROFFT_ELEM = <p>. */
243 			if (np->tag == NULL) {
244 				np->tag = mandoc_strdup(n->tag == NULL ?
245 				    n->child->string : n->tag);
246 				np->flags |= NODE_ID;
247 				n->flags &= ~NODE_ID;
248 			}
249 			return;
250 		case MDOC_Sh:
251 		case MDOC_Ss:
252 		case MDOC_Bd:
253 		case MDOC_Bl:
254 		case MDOC_D1:
255 		case MDOC_Dl:
256 		case MDOC_Rs:
257 			/* Do not move past major blocks. */
258 			return;
259 		default:
260 			/*
261 			 * Move past in-line content and partial
262 			 * blocks, for example .It Xo or .It Bq Er.
263 			 */
264 			break;
265 		}
266 	}
267 }
268 
269 /*
270  * When a paragraph is tagged and starts with text,
271  * move the permalink to the first few words.
272  */
273 static void
274 tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
275 {
276 	char	*cp;
277 
278 	if (n == NULL || n->type != ROFFT_TEXT ||
279 	    *n->string == '\0' || *n->string == ' ')
280 		return;
281 
282 	cp = n->string;
283 	while (cp != NULL && cp - n->string < 5)
284 		cp = strchr(cp + 1, ' ');
285 
286 	/* If the first text node is longer, split it. */
287 
288 	if (cp != NULL && cp[1] != '\0') {
289 		man->last = n;
290 		man->next = ROFF_NEXT_SIBLING;
291 		roff_word_alloc(man, n->line,
292 		    n->pos + (cp - n->string), cp + 1);
293 		man->last->flags = n->flags & ~NODE_LINE;
294 		*cp = '\0';
295 	}
296 
297 	assert(n->tag == NULL);
298 	n->tag = mandoc_strdup(tag);
299 	n->flags |= NODE_HREF;
300 }
301 
302 /*
303  * When all tags have been set, decide where to put
304  * the associated permalinks, and maybe move some tags
305  * to the beginning of the respective paragraphs.
306  */
307 void
308 tag_postprocess(struct roff_man *man, struct roff_node *n)
309 {
310 	if (n->flags & NODE_ID) {
311 		switch (n->tok) {
312 		case MDOC_Pp:
313 			tag_move_href(man, n->next, n->tag);
314 			break;
315 		case MDOC_Bd:
316 		case MDOC_D1:
317 		case MDOC_Dl:
318 			tag_move_href(man, n->child, n->tag);
319 			break;
320 		case MDOC_Bl:
321 			/* XXX No permalink for now. */
322 			break;
323 		default:
324 			if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
325 				tag_move_id(n);
326 			if (n->tok != MDOC_Tg)
327 				n->flags |= NODE_HREF;
328 			else if ((n->flags & NODE_ID) == 0) {
329 				n->flags |= NODE_NOPRT;
330 				free(n->tag);
331 				n->tag = NULL;
332 			}
333 			break;
334 		}
335 	}
336 	for (n = n->child; n != NULL; n = n->next)
337 		tag_postprocess(man, n);
338 }
339