1 /* $Vendor-Id: chars.c,v 1.34 2011/03/22 10:13:01 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <assert.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 27 #include "mandoc.h" 28 #include "out.h" 29 30 #define PRINT_HI 126 31 #define PRINT_LO 32 32 33 struct ln { 34 struct ln *next; 35 const char *code; 36 const char *ascii; 37 int unicode; 38 int type; 39 #define CHARS_CHAR (1 << 0) 40 #define CHARS_STRING (1 << 1) 41 #define CHARS_BOTH (CHARS_CHAR | CHARS_STRING) 42 }; 43 44 #define LINES_MAX 351 45 46 #define CHAR(in, ch, code) \ 47 { NULL, (in), (ch), (code), CHARS_CHAR }, 48 #define STRING(in, ch, code) \ 49 { NULL, (in), (ch), (code), CHARS_STRING }, 50 #define BOTH(in, ch, code) \ 51 { NULL, (in), (ch), (code), CHARS_BOTH }, 52 53 #define CHAR_TBL_START static struct ln lines[LINES_MAX] = { 54 #define CHAR_TBL_END }; 55 56 #include "chars.in" 57 58 struct ctab { 59 enum chars type; 60 struct ln **htab; 61 }; 62 63 static inline int match(const struct ln *, 64 const char *, size_t, int); 65 static const struct ln *find(struct ctab *, const char *, size_t, int); 66 67 68 void 69 chars_free(void *arg) 70 { 71 struct ctab *tab; 72 73 tab = (struct ctab *)arg; 74 75 free(tab->htab); 76 free(tab); 77 } 78 79 80 void * 81 chars_init(enum chars type) 82 { 83 struct ctab *tab; 84 struct ln **htab; 85 struct ln *pp; 86 int i, hash; 87 88 /* 89 * Constructs a very basic chaining hashtable. The hash routine 90 * is simply the integral value of the first character. 91 * Subsequent entries are chained in the order they're processed 92 * (they're in-line re-ordered during lookup). 93 */ 94 95 tab = mandoc_malloc(sizeof(struct ctab)); 96 htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **)); 97 98 for (i = 0; i < LINES_MAX; i++) { 99 hash = (int)lines[i].code[0] - PRINT_LO; 100 101 if (NULL == (pp = htab[hash])) { 102 htab[hash] = &lines[i]; 103 continue; 104 } 105 106 for ( ; pp->next; pp = pp->next) 107 /* Scan ahead. */ ; 108 pp->next = &lines[i]; 109 } 110 111 tab->htab = htab; 112 tab->type = type; 113 return(tab); 114 } 115 116 117 /* 118 * Special character to Unicode codepoint. 119 */ 120 int 121 chars_spec2cp(void *arg, const char *p, size_t sz) 122 { 123 const struct ln *ln; 124 125 ln = find((struct ctab *)arg, p, sz, CHARS_CHAR); 126 if (NULL == ln) 127 return(-1); 128 return(ln->unicode); 129 } 130 131 132 /* 133 * Reserved word to Unicode codepoint. 134 */ 135 int 136 chars_res2cp(void *arg, const char *p, size_t sz) 137 { 138 const struct ln *ln; 139 140 ln = find((struct ctab *)arg, p, sz, CHARS_STRING); 141 if (NULL == ln) 142 return(-1); 143 return(ln->unicode); 144 } 145 146 147 /* 148 * Numbered character to literal character, 149 * represented as a null-terminated string for additional safety. 150 */ 151 const char * 152 chars_num2char(const char *p, size_t sz) 153 { 154 int i; 155 static char c[2]; 156 157 if (sz > 3) 158 return(NULL); 159 i = atoi(p); 160 if (i < 0 || i > 255) 161 return(NULL); 162 c[0] = (char)i; 163 c[1] = '\0'; 164 return(c); 165 } 166 167 168 /* 169 * Special character to string array. 170 */ 171 const char * 172 chars_spec2str(void *arg, const char *p, size_t sz, size_t *rsz) 173 { 174 const struct ln *ln; 175 176 ln = find((struct ctab *)arg, p, sz, CHARS_CHAR); 177 if (NULL == ln) 178 return(NULL); 179 180 *rsz = strlen(ln->ascii); 181 return(ln->ascii); 182 } 183 184 185 /* 186 * Reserved word to string array. 187 */ 188 const char * 189 chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz) 190 { 191 const struct ln *ln; 192 193 ln = find((struct ctab *)arg, p, sz, CHARS_STRING); 194 if (NULL == ln) 195 return(NULL); 196 197 *rsz = strlen(ln->ascii); 198 return(ln->ascii); 199 } 200 201 202 static const struct ln * 203 find(struct ctab *tab, const char *p, size_t sz, int type) 204 { 205 struct ln *pp, *prev; 206 struct ln **htab; 207 int hash; 208 209 assert(p); 210 if (0 == sz) 211 return(NULL); 212 213 if (p[0] < PRINT_LO || p[0] > PRINT_HI) 214 return(NULL); 215 216 /* 217 * Lookup the symbol in the symbol hash. See ascii2htab for the 218 * hashtable specs. This dynamically re-orders the hash chain 219 * to optimise for repeat hits. 220 */ 221 222 hash = (int)p[0] - PRINT_LO; 223 htab = tab->htab; 224 225 if (NULL == (pp = htab[hash])) 226 return(NULL); 227 228 for (prev = NULL; pp; pp = pp->next) { 229 if ( ! match(pp, p, sz, type)) { 230 prev = pp; 231 continue; 232 } 233 234 if (prev) { 235 prev->next = pp->next; 236 pp->next = htab[hash]; 237 htab[hash] = pp; 238 } 239 240 return(pp); 241 } 242 243 return(NULL); 244 } 245 246 247 static inline int 248 match(const struct ln *ln, const char *p, size_t sz, int type) 249 { 250 251 if ( ! (ln->type & type)) 252 return(0); 253 if (strncmp(ln->code, p, sz)) 254 return(0); 255 return('\0' == ln->code[(int)sz]); 256 } 257