1 /* $Vendor-Id: chars.c,v 1.31 2011/01/02 10:10:57 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include <assert.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <string.h> 25 26 #include "mandoc.h" 27 #include "chars.h" 28 29 #define PRINT_HI 126 30 #define PRINT_LO 32 31 32 struct ln { 33 struct ln *next; 34 const char *code; 35 const char *ascii; 36 int unicode; 37 int type; 38 #define CHARS_CHAR (1 << 0) 39 #define CHARS_STRING (1 << 1) 40 #define CHARS_BOTH (CHARS_CHAR | CHARS_STRING) 41 }; 42 43 #define LINES_MAX 351 44 45 #define CHAR(in, ch, code) \ 46 { NULL, (in), (ch), (code), CHARS_CHAR }, 47 #define STRING(in, ch, code) \ 48 { NULL, (in), (ch), (code), CHARS_STRING }, 49 #define BOTH(in, ch, code) \ 50 { NULL, (in), (ch), (code), CHARS_BOTH }, 51 52 #define CHAR_TBL_START static struct ln lines[LINES_MAX] = { 53 #define CHAR_TBL_END }; 54 55 #include "chars.in" 56 57 struct ctab { 58 enum chars type; 59 struct ln **htab; 60 }; 61 62 static inline int match(const struct ln *, 63 const char *, size_t, int); 64 static const struct ln *find(struct ctab *, const char *, size_t, int); 65 66 67 void 68 chars_free(void *arg) 69 { 70 struct ctab *tab; 71 72 tab = (struct ctab *)arg; 73 74 free(tab->htab); 75 free(tab); 76 } 77 78 79 void * 80 chars_init(enum chars type) 81 { 82 struct ctab *tab; 83 struct ln **htab; 84 struct ln *pp; 85 int i, hash; 86 87 /* 88 * Constructs a very basic chaining hashtable. The hash routine 89 * is simply the integral value of the first character. 90 * Subsequent entries are chained in the order they're processed 91 * (they're in-line re-ordered during lookup). 92 */ 93 94 tab = malloc(sizeof(struct ctab)); 95 if (NULL == tab) { 96 perror(NULL); 97 exit((int)MANDOCLEVEL_SYSERR); 98 } 99 100 htab = calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **)); 101 if (NULL == htab) { 102 perror(NULL); 103 exit((int)MANDOCLEVEL_SYSERR); 104 } 105 106 for (i = 0; i < LINES_MAX; i++) { 107 hash = (int)lines[i].code[0] - PRINT_LO; 108 109 if (NULL == (pp = htab[hash])) { 110 htab[hash] = &lines[i]; 111 continue; 112 } 113 114 for ( ; pp->next; pp = pp->next) 115 /* Scan ahead. */ ; 116 pp->next = &lines[i]; 117 } 118 119 tab->htab = htab; 120 tab->type = type; 121 return(tab); 122 } 123 124 125 /* 126 * Special character to Unicode codepoint. 127 */ 128 int 129 chars_spec2cp(void *arg, const char *p, size_t sz) 130 { 131 const struct ln *ln; 132 133 ln = find((struct ctab *)arg, p, sz, CHARS_CHAR); 134 if (NULL == ln) 135 return(-1); 136 return(ln->unicode); 137 } 138 139 140 /* 141 * Reserved word to Unicode codepoint. 142 */ 143 int 144 chars_res2cp(void *arg, const char *p, size_t sz) 145 { 146 const struct ln *ln; 147 148 ln = find((struct ctab *)arg, p, sz, CHARS_STRING); 149 if (NULL == ln) 150 return(-1); 151 return(ln->unicode); 152 } 153 154 155 /* 156 * Special character to string array. 157 */ 158 const char * 159 chars_spec2str(void *arg, const char *p, size_t sz, size_t *rsz) 160 { 161 const struct ln *ln; 162 163 ln = find((struct ctab *)arg, p, sz, CHARS_CHAR); 164 if (NULL == ln) 165 return(NULL); 166 167 *rsz = strlen(ln->ascii); 168 return(ln->ascii); 169 } 170 171 172 /* 173 * Reserved word to string array. 174 */ 175 const char * 176 chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz) 177 { 178 const struct ln *ln; 179 180 ln = find((struct ctab *)arg, p, sz, CHARS_STRING); 181 if (NULL == ln) 182 return(NULL); 183 184 *rsz = strlen(ln->ascii); 185 return(ln->ascii); 186 } 187 188 189 static const struct ln * 190 find(struct ctab *tab, const char *p, size_t sz, int type) 191 { 192 struct ln *pp, *prev; 193 struct ln **htab; 194 int hash; 195 196 assert(p); 197 if (0 == sz) 198 return(NULL); 199 200 if (p[0] < PRINT_LO || p[0] > PRINT_HI) 201 return(NULL); 202 203 /* 204 * Lookup the symbol in the symbol hash. See ascii2htab for the 205 * hashtable specs. This dynamically re-orders the hash chain 206 * to optimise for repeat hits. 207 */ 208 209 hash = (int)p[0] - PRINT_LO; 210 htab = tab->htab; 211 212 if (NULL == (pp = htab[hash])) 213 return(NULL); 214 215 for (prev = NULL; pp; pp = pp->next) { 216 if ( ! match(pp, p, sz, type)) { 217 prev = pp; 218 continue; 219 } 220 221 if (prev) { 222 prev->next = pp->next; 223 pp->next = htab[hash]; 224 htab[hash] = pp; 225 } 226 227 return(pp); 228 } 229 230 return(NULL); 231 } 232 233 234 static inline int 235 match(const struct ln *ln, const char *p, size_t sz, int type) 236 { 237 238 if ( ! (ln->type & type)) 239 return(0); 240 if (strncmp(ln->code, p, sz)) 241 return(0); 242 return('\0' == ln->code[(int)sz]); 243 } 244