1 /* $OpenBSD: tag.c,v 1.37 2022/04/26 11:28:35 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022 4 * Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Functions to tag syntax tree nodes. 19 * For internal use by mandoc(1) validation modules only. 20 */ 21 #include <sys/types.h> 22 23 #include <assert.h> 24 #include <limits.h> 25 #include <stddef.h> 26 #include <stdint.h> 27 #include <stdlib.h> 28 #include <string.h> 29 30 #include "mandoc_aux.h" 31 #include "mandoc_ohash.h" 32 #include "roff.h" 33 #include "mdoc.h" 34 #include "roff_int.h" 35 #include "tag.h" 36 37 struct tag_entry { 38 struct roff_node **nodes; 39 size_t maxnodes; 40 size_t nnodes; 41 int prio; 42 char s[]; 43 }; 44 45 static void tag_move_href(struct roff_man *, 46 struct roff_node *, const char *); 47 static void tag_move_id(struct roff_node *); 48 49 static struct ohash tag_data; 50 51 52 /* 53 * Set up the ohash table to collect nodes 54 * where various marked-up terms are documented. 55 */ 56 void 57 tag_alloc(void) 58 { 59 mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s)); 60 } 61 62 void 63 tag_free(void) 64 { 65 struct tag_entry *entry; 66 unsigned int slot; 67 68 if (tag_data.info.free == NULL) 69 return; 70 entry = ohash_first(&tag_data, &slot); 71 while (entry != NULL) { 72 free(entry->nodes); 73 free(entry); 74 entry = ohash_next(&tag_data, &slot); 75 } 76 ohash_delete(&tag_data); 77 tag_data.info.free = NULL; 78 } 79 80 /* 81 * Set a node where a term is defined, 82 * unless the term is already defined at a lower priority. 83 */ 84 void 85 tag_put(const char *s, int prio, struct roff_node *n) 86 { 87 struct tag_entry *entry; 88 struct roff_node *nold; 89 const char *se; 90 size_t len; 91 unsigned int slot; 92 93 assert(prio <= TAG_FALLBACK); 94 95 /* 96 * If the node is already tagged, the existing tag is 97 * explicit and we are now about to add an implicit tag. 98 * Don't do that; just skip implicit tagging if the author 99 * specified an explicit tag. 100 */ 101 102 if (n->flags & NODE_ID) 103 return; 104 105 /* Determine the implicit tag. */ 106 107 if (s == NULL) { 108 if (n->child == NULL || n->child->type != ROFFT_TEXT) 109 return; 110 s = n->child->string; 111 switch (s[0]) { 112 case '-': 113 s++; 114 break; 115 case '\\': 116 switch (s[1]) { 117 case '&': 118 case '-': 119 case 'e': 120 s += 2; 121 break; 122 default: 123 break; 124 } 125 break; 126 default: 127 break; 128 } 129 } 130 131 /* 132 * Skip whitespace and escapes and whatever follows, 133 * and if there is any, downgrade the priority. 134 */ 135 136 len = strcspn(s, " \t\\"); 137 if (len == 0) 138 return; 139 140 se = s + len; 141 if (*se != '\0' && prio < TAG_WEAK) 142 prio = TAG_WEAK; 143 144 slot = ohash_qlookupi(&tag_data, s, &se); 145 entry = ohash_find(&tag_data, slot); 146 147 /* Build a new entry. */ 148 149 if (entry == NULL) { 150 entry = mandoc_malloc(sizeof(*entry) + len + 1); 151 memcpy(entry->s, s, len); 152 entry->s[len] = '\0'; 153 entry->nodes = NULL; 154 entry->maxnodes = entry->nnodes = 0; 155 ohash_insert(&tag_data, slot, entry); 156 } 157 158 /* 159 * Lower priority numbers take precedence. 160 * If a better entry is already present, ignore the new one. 161 */ 162 163 else if (entry->prio < prio) 164 return; 165 166 /* 167 * If the existing entry is worse, clear it. 168 * In addition, a tag with priority TAG_FALLBACK 169 * is only used if the tag occurs exactly once. 170 */ 171 172 else if (entry->prio > prio || prio == TAG_FALLBACK) { 173 while (entry->nnodes > 0) { 174 nold = entry->nodes[--entry->nnodes]; 175 nold->flags &= ~NODE_ID; 176 free(nold->tag); 177 nold->tag = NULL; 178 } 179 if (prio == TAG_FALLBACK) { 180 entry->prio = TAG_DELETE; 181 return; 182 } 183 } 184 185 /* Remember the new node. */ 186 187 if (entry->maxnodes == entry->nnodes) { 188 entry->maxnodes += 4; 189 entry->nodes = mandoc_reallocarray(entry->nodes, 190 entry->maxnodes, sizeof(*entry->nodes)); 191 } 192 entry->nodes[entry->nnodes++] = n; 193 entry->prio = prio; 194 n->flags |= NODE_ID; 195 if (n->child == NULL || n->child->string != s || *se != '\0') { 196 assert(n->tag == NULL); 197 n->tag = mandoc_strndup(s, len); 198 } 199 } 200 201 int 202 tag_exists(const char *tag) 203 { 204 return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL; 205 } 206 207 /* 208 * For in-line elements, move the link target 209 * to the enclosing paragraph when appropriate. 210 */ 211 static void 212 tag_move_id(struct roff_node *n) 213 { 214 struct roff_node *np; 215 216 np = n; 217 for (;;) { 218 if (np->prev != NULL) 219 np = np->prev; 220 else if ((np = np->parent) == NULL) 221 return; 222 switch (np->tok) { 223 case MDOC_It: 224 switch (np->parent->parent->norm->Bl.type) { 225 case LIST_column: 226 /* Target the ROFFT_BLOCK = <tr>. */ 227 np = np->parent; 228 break; 229 case LIST_diag: 230 case LIST_hang: 231 case LIST_inset: 232 case LIST_ohang: 233 case LIST_tag: 234 /* Target the ROFFT_HEAD = <dt>. */ 235 np = np->parent->head; 236 break; 237 default: 238 /* Target the ROFF_BODY = <li>. */ 239 break; 240 } 241 /* FALLTHROUGH */ 242 case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */ 243 if (np->tag == NULL) { 244 np->tag = mandoc_strdup(n->tag == NULL ? 245 n->child->string : n->tag); 246 np->flags |= NODE_ID; 247 n->flags &= ~NODE_ID; 248 } 249 return; 250 case MDOC_Sh: 251 case MDOC_Ss: 252 case MDOC_Bd: 253 case MDOC_Bl: 254 case MDOC_D1: 255 case MDOC_Dl: 256 case MDOC_Rs: 257 /* Do not move past major blocks. */ 258 return; 259 default: 260 /* 261 * Move past in-line content and partial 262 * blocks, for example .It Xo or .It Bq Er. 263 */ 264 break; 265 } 266 } 267 } 268 269 /* 270 * When a paragraph is tagged and starts with text, 271 * move the permalink to the first few words. 272 */ 273 static void 274 tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag) 275 { 276 char *cp; 277 278 if (n == NULL || n->type != ROFFT_TEXT || 279 *n->string == '\0' || *n->string == ' ') 280 return; 281 282 cp = n->string; 283 while (cp != NULL && cp - n->string < 5) 284 cp = strchr(cp + 1, ' '); 285 286 /* If the first text node is longer, split it. */ 287 288 if (cp != NULL && cp[1] != '\0') { 289 man->last = n; 290 man->next = ROFF_NEXT_SIBLING; 291 roff_word_alloc(man, n->line, 292 n->pos + (cp - n->string), cp + 1); 293 man->last->flags = n->flags & ~NODE_LINE; 294 *cp = '\0'; 295 } 296 297 assert(n->tag == NULL); 298 n->tag = mandoc_strdup(tag); 299 n->flags |= NODE_HREF; 300 } 301 302 /* 303 * When all tags have been set, decide where to put 304 * the associated permalinks, and maybe move some tags 305 * to the beginning of the respective paragraphs. 306 */ 307 void 308 tag_postprocess(struct roff_man *man, struct roff_node *n) 309 { 310 if (n->flags & NODE_ID) { 311 switch (n->tok) { 312 case MDOC_Pp: 313 tag_move_href(man, n->next, n->tag); 314 break; 315 case MDOC_Bd: 316 case MDOC_D1: 317 case MDOC_Dl: 318 tag_move_href(man, n->child, n->tag); 319 break; 320 case MDOC_Bl: 321 /* XXX No permalink for now. */ 322 break; 323 default: 324 if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo) 325 tag_move_id(n); 326 if (n->tok != MDOC_Tg) 327 n->flags |= NODE_HREF; 328 else if ((n->flags & NODE_ID) == 0) { 329 n->flags |= NODE_NOPRT; 330 free(n->tag); 331 n->tag = NULL; 332 } 333 break; 334 } 335 } 336 for (n = n->child; n != NULL; n = n->next) 337 tag_postprocess(man, n); 338 } 339