xref: /netbsd-src/external/bsd/mdocml/dist/chars.c (revision ca453df649ce9db45b64d73678ba06cbccf9aa11)
1 /*	$Vendor-Id: chars.c,v 1.34 2011/03/22 10:13:01 kristaps Exp $ */
2 /*
3  * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <assert.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #include "mandoc.h"
28 #include "out.h"
29 
30 #define	PRINT_HI	 126
31 #define	PRINT_LO	 32
32 
33 struct	ln {
34 	struct ln	 *next;
35 	const char	 *code;
36 	const char	 *ascii;
37 	int		  unicode;
38 	int		  type;
39 #define	CHARS_CHAR	 (1 << 0)
40 #define	CHARS_STRING	 (1 << 1)
41 #define CHARS_BOTH	 (CHARS_CHAR | CHARS_STRING)
42 };
43 
44 #define	LINES_MAX	  351
45 
46 #define CHAR(in, ch, code) \
47 	{ NULL, (in), (ch), (code), CHARS_CHAR },
48 #define STRING(in, ch, code) \
49 	{ NULL, (in), (ch), (code), CHARS_STRING },
50 #define BOTH(in, ch, code) \
51 	{ NULL, (in), (ch), (code), CHARS_BOTH },
52 
53 #define	CHAR_TBL_START	  static struct ln lines[LINES_MAX] = {
54 #define	CHAR_TBL_END	  };
55 
56 #include "chars.in"
57 
58 struct	ctab {
59 	enum chars	  type;
60 	struct ln	**htab;
61 };
62 
63 static	inline int	  match(const struct ln *,
64 				const char *, size_t, int);
65 static	const struct ln	 *find(struct ctab *, const char *, size_t, int);
66 
67 
68 void
69 chars_free(void *arg)
70 {
71 	struct ctab	*tab;
72 
73 	tab = (struct ctab *)arg;
74 
75 	free(tab->htab);
76 	free(tab);
77 }
78 
79 
80 void *
81 chars_init(enum chars type)
82 {
83 	struct ctab	 *tab;
84 	struct ln	**htab;
85 	struct ln	 *pp;
86 	int		  i, hash;
87 
88 	/*
89 	 * Constructs a very basic chaining hashtable.  The hash routine
90 	 * is simply the integral value of the first character.
91 	 * Subsequent entries are chained in the order they're processed
92 	 * (they're in-line re-ordered during lookup).
93 	 */
94 
95 	tab = mandoc_malloc(sizeof(struct ctab));
96 	htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **));
97 
98 	for (i = 0; i < LINES_MAX; i++) {
99 		hash = (int)lines[i].code[0] - PRINT_LO;
100 
101 		if (NULL == (pp = htab[hash])) {
102 			htab[hash] = &lines[i];
103 			continue;
104 		}
105 
106 		for ( ; pp->next; pp = pp->next)
107 			/* Scan ahead. */ ;
108 		pp->next = &lines[i];
109 	}
110 
111 	tab->htab = htab;
112 	tab->type = type;
113 	return(tab);
114 }
115 
116 
117 /*
118  * Special character to Unicode codepoint.
119  */
120 int
121 chars_spec2cp(void *arg, const char *p, size_t sz)
122 {
123 	const struct ln	*ln;
124 
125 	ln = find((struct ctab *)arg, p, sz, CHARS_CHAR);
126 	if (NULL == ln)
127 		return(-1);
128 	return(ln->unicode);
129 }
130 
131 
132 /*
133  * Reserved word to Unicode codepoint.
134  */
135 int
136 chars_res2cp(void *arg, const char *p, size_t sz)
137 {
138 	const struct ln	*ln;
139 
140 	ln = find((struct ctab *)arg, p, sz, CHARS_STRING);
141 	if (NULL == ln)
142 		return(-1);
143 	return(ln->unicode);
144 }
145 
146 
147 /*
148  * Numbered character to literal character,
149  * represented as a null-terminated string for additional safety.
150  */
151 const char *
152 chars_num2char(const char *p, size_t sz)
153 {
154 	int		  i;
155 	static char	  c[2];
156 
157 	if (sz > 3)
158 		return(NULL);
159 	i = atoi(p);
160 	if (i < 0 || i > 255)
161 		return(NULL);
162 	c[0] = (char)i;
163 	c[1] = '\0';
164 	return(c);
165 }
166 
167 
168 /*
169  * Special character to string array.
170  */
171 const char *
172 chars_spec2str(void *arg, const char *p, size_t sz, size_t *rsz)
173 {
174 	const struct ln	*ln;
175 
176 	ln = find((struct ctab *)arg, p, sz, CHARS_CHAR);
177 	if (NULL == ln)
178 		return(NULL);
179 
180 	*rsz = strlen(ln->ascii);
181 	return(ln->ascii);
182 }
183 
184 
185 /*
186  * Reserved word to string array.
187  */
188 const char *
189 chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz)
190 {
191 	const struct ln	*ln;
192 
193 	ln = find((struct ctab *)arg, p, sz, CHARS_STRING);
194 	if (NULL == ln)
195 		return(NULL);
196 
197 	*rsz = strlen(ln->ascii);
198 	return(ln->ascii);
199 }
200 
201 
202 static const struct ln *
203 find(struct ctab *tab, const char *p, size_t sz, int type)
204 {
205 	struct ln	 *pp, *prev;
206 	struct ln	**htab;
207 	int		  hash;
208 
209 	assert(p);
210 	if (0 == sz)
211 		return(NULL);
212 
213 	if (p[0] < PRINT_LO || p[0] > PRINT_HI)
214 		return(NULL);
215 
216 	/*
217 	 * Lookup the symbol in the symbol hash.  See ascii2htab for the
218 	 * hashtable specs.  This dynamically re-orders the hash chain
219 	 * to optimise for repeat hits.
220 	 */
221 
222 	hash = (int)p[0] - PRINT_LO;
223 	htab = tab->htab;
224 
225 	if (NULL == (pp = htab[hash]))
226 		return(NULL);
227 
228 	for (prev = NULL; pp; pp = pp->next) {
229 		if ( ! match(pp, p, sz, type)) {
230 			prev = pp;
231 			continue;
232 		}
233 
234 		if (prev) {
235 			prev->next = pp->next;
236 			pp->next = htab[hash];
237 			htab[hash] = pp;
238 		}
239 
240 		return(pp);
241 	}
242 
243 	return(NULL);
244 }
245 
246 
247 static inline int
248 match(const struct ln *ln, const char *p, size_t sz, int type)
249 {
250 
251 	if ( ! (ln->type & type))
252 		return(0);
253 	if (strncmp(ln->code, p, sz))
254 		return(0);
255 	return('\0' == ln->code[(int)sz]);
256 }
257