xref: /netbsd-src/external/bsd/mdocml/dist/chars.c (revision c2f76ff004a2cb67efe5b12d97bd3ef7fe89e18d)
1 /*	$Vendor-Id: chars.c,v 1.31 2011/01/02 10:10:57 kristaps Exp $ */
2 /*
3  * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include <assert.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 
26 #include "mandoc.h"
27 #include "chars.h"
28 
29 #define	PRINT_HI	 126
30 #define	PRINT_LO	 32
31 
32 struct	ln {
33 	struct ln	 *next;
34 	const char	 *code;
35 	const char	 *ascii;
36 	int		  unicode;
37 	int		  type;
38 #define	CHARS_CHAR	 (1 << 0)
39 #define	CHARS_STRING	 (1 << 1)
40 #define CHARS_BOTH	 (CHARS_CHAR | CHARS_STRING)
41 };
42 
43 #define	LINES_MAX	  351
44 
45 #define CHAR(in, ch, code) \
46 	{ NULL, (in), (ch), (code), CHARS_CHAR },
47 #define STRING(in, ch, code) \
48 	{ NULL, (in), (ch), (code), CHARS_STRING },
49 #define BOTH(in, ch, code) \
50 	{ NULL, (in), (ch), (code), CHARS_BOTH },
51 
52 #define	CHAR_TBL_START	  static struct ln lines[LINES_MAX] = {
53 #define	CHAR_TBL_END	  };
54 
55 #include "chars.in"
56 
57 struct	ctab {
58 	enum chars	  type;
59 	struct ln	**htab;
60 };
61 
62 static	inline int	  match(const struct ln *,
63 				const char *, size_t, int);
64 static	const struct ln	 *find(struct ctab *, const char *, size_t, int);
65 
66 
67 void
68 chars_free(void *arg)
69 {
70 	struct ctab	*tab;
71 
72 	tab = (struct ctab *)arg;
73 
74 	free(tab->htab);
75 	free(tab);
76 }
77 
78 
79 void *
80 chars_init(enum chars type)
81 {
82 	struct ctab	 *tab;
83 	struct ln	**htab;
84 	struct ln	 *pp;
85 	int		  i, hash;
86 
87 	/*
88 	 * Constructs a very basic chaining hashtable.  The hash routine
89 	 * is simply the integral value of the first character.
90 	 * Subsequent entries are chained in the order they're processed
91 	 * (they're in-line re-ordered during lookup).
92 	 */
93 
94 	tab = malloc(sizeof(struct ctab));
95 	if (NULL == tab) {
96 		perror(NULL);
97 		exit((int)MANDOCLEVEL_SYSERR);
98 	}
99 
100 	htab = calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **));
101 	if (NULL == htab) {
102 		perror(NULL);
103 		exit((int)MANDOCLEVEL_SYSERR);
104 	}
105 
106 	for (i = 0; i < LINES_MAX; i++) {
107 		hash = (int)lines[i].code[0] - PRINT_LO;
108 
109 		if (NULL == (pp = htab[hash])) {
110 			htab[hash] = &lines[i];
111 			continue;
112 		}
113 
114 		for ( ; pp->next; pp = pp->next)
115 			/* Scan ahead. */ ;
116 		pp->next = &lines[i];
117 	}
118 
119 	tab->htab = htab;
120 	tab->type = type;
121 	return(tab);
122 }
123 
124 
125 /*
126  * Special character to Unicode codepoint.
127  */
128 int
129 chars_spec2cp(void *arg, const char *p, size_t sz)
130 {
131 	const struct ln	*ln;
132 
133 	ln = find((struct ctab *)arg, p, sz, CHARS_CHAR);
134 	if (NULL == ln)
135 		return(-1);
136 	return(ln->unicode);
137 }
138 
139 
140 /*
141  * Reserved word to Unicode codepoint.
142  */
143 int
144 chars_res2cp(void *arg, const char *p, size_t sz)
145 {
146 	const struct ln	*ln;
147 
148 	ln = find((struct ctab *)arg, p, sz, CHARS_STRING);
149 	if (NULL == ln)
150 		return(-1);
151 	return(ln->unicode);
152 }
153 
154 
155 /*
156  * Special character to string array.
157  */
158 const char *
159 chars_spec2str(void *arg, const char *p, size_t sz, size_t *rsz)
160 {
161 	const struct ln	*ln;
162 
163 	ln = find((struct ctab *)arg, p, sz, CHARS_CHAR);
164 	if (NULL == ln)
165 		return(NULL);
166 
167 	*rsz = strlen(ln->ascii);
168 	return(ln->ascii);
169 }
170 
171 
172 /*
173  * Reserved word to string array.
174  */
175 const char *
176 chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz)
177 {
178 	const struct ln	*ln;
179 
180 	ln = find((struct ctab *)arg, p, sz, CHARS_STRING);
181 	if (NULL == ln)
182 		return(NULL);
183 
184 	*rsz = strlen(ln->ascii);
185 	return(ln->ascii);
186 }
187 
188 
189 static const struct ln *
190 find(struct ctab *tab, const char *p, size_t sz, int type)
191 {
192 	struct ln	 *pp, *prev;
193 	struct ln	**htab;
194 	int		  hash;
195 
196 	assert(p);
197 	if (0 == sz)
198 		return(NULL);
199 
200 	if (p[0] < PRINT_LO || p[0] > PRINT_HI)
201 		return(NULL);
202 
203 	/*
204 	 * Lookup the symbol in the symbol hash.  See ascii2htab for the
205 	 * hashtable specs.  This dynamically re-orders the hash chain
206 	 * to optimise for repeat hits.
207 	 */
208 
209 	hash = (int)p[0] - PRINT_LO;
210 	htab = tab->htab;
211 
212 	if (NULL == (pp = htab[hash]))
213 		return(NULL);
214 
215 	for (prev = NULL; pp; pp = pp->next) {
216 		if ( ! match(pp, p, sz, type)) {
217 			prev = pp;
218 			continue;
219 		}
220 
221 		if (prev) {
222 			prev->next = pp->next;
223 			pp->next = htab[hash];
224 			htab[hash] = pp;
225 		}
226 
227 		return(pp);
228 	}
229 
230 	return(NULL);
231 }
232 
233 
234 static inline int
235 match(const struct ln *ln, const char *p, size_t sz, int type)
236 {
237 
238 	if ( ! (ln->type & type))
239 		return(0);
240 	if (strncmp(ln->code, p, sz))
241 		return(0);
242 	return('\0' == ln->code[(int)sz]);
243 }
244