xref: /minix3/external/bsd/mdocml/dist/chars.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc /*	Id: chars.c,v 1.54 2013/06/20 22:39:30 schwarze Exp  */
2d65f6f70SBen Gras /*
392395e9cSLionel Sambuc  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
492395e9cSLionel Sambuc  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5d65f6f70SBen Gras  *
6d65f6f70SBen Gras  * Permission to use, copy, modify, and distribute this software for any
7d65f6f70SBen Gras  * purpose with or without fee is hereby granted, provided that the above
8d65f6f70SBen Gras  * copyright notice and this permission notice appear in all copies.
9d65f6f70SBen Gras  *
10d65f6f70SBen Gras  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11d65f6f70SBen Gras  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12d65f6f70SBen Gras  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13d65f6f70SBen Gras  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14d65f6f70SBen Gras  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15d65f6f70SBen Gras  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16d65f6f70SBen Gras  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17d65f6f70SBen Gras  */
18d65f6f70SBen Gras #ifdef HAVE_CONFIG_H
19d65f6f70SBen Gras #include "config.h"
20d65f6f70SBen Gras #endif
21d65f6f70SBen Gras 
22d65f6f70SBen Gras #include <assert.h>
2392395e9cSLionel Sambuc #include <ctype.h>
24d65f6f70SBen Gras #include <stdlib.h>
25d65f6f70SBen Gras #include <string.h>
26d65f6f70SBen Gras 
27d65f6f70SBen Gras #include "mandoc.h"
2892395e9cSLionel Sambuc #include "libmandoc.h"
29d65f6f70SBen Gras 
30d65f6f70SBen Gras #define	PRINT_HI	 126
31d65f6f70SBen Gras #define	PRINT_LO	 32
32d65f6f70SBen Gras 
33d65f6f70SBen Gras struct	ln {
34d65f6f70SBen Gras 	struct ln	 *next;
35d65f6f70SBen Gras 	const char	 *code;
36d65f6f70SBen Gras 	const char	 *ascii;
37d65f6f70SBen Gras 	int		  unicode;
38d65f6f70SBen Gras };
39d65f6f70SBen Gras 
40*0a6a1f1dSLionel Sambuc #define	LINES_MAX	  329
41d65f6f70SBen Gras 
42d65f6f70SBen Gras #define CHAR(in, ch, code) \
4392395e9cSLionel Sambuc 	{ NULL, (in), (ch), (code) },
44d65f6f70SBen Gras 
45d65f6f70SBen Gras #define	CHAR_TBL_START	  static struct ln lines[LINES_MAX] = {
46d65f6f70SBen Gras #define	CHAR_TBL_END	  };
47d65f6f70SBen Gras 
48d65f6f70SBen Gras #include "chars.in"
49d65f6f70SBen Gras 
5092395e9cSLionel Sambuc struct	mchars {
51d65f6f70SBen Gras 	struct ln	**htab;
52d65f6f70SBen Gras };
53d65f6f70SBen Gras 
5492395e9cSLionel Sambuc static	const struct ln	 *find(const struct mchars *,
5592395e9cSLionel Sambuc 				const char *, size_t);
56d65f6f70SBen Gras 
57d65f6f70SBen Gras void
mchars_free(struct mchars * arg)5892395e9cSLionel Sambuc mchars_free(struct mchars *arg)
59d65f6f70SBen Gras {
60d65f6f70SBen Gras 
6192395e9cSLionel Sambuc 	free(arg->htab);
6292395e9cSLionel Sambuc 	free(arg);
63d65f6f70SBen Gras }
64d65f6f70SBen Gras 
6592395e9cSLionel Sambuc struct mchars *
mchars_alloc(void)6692395e9cSLionel Sambuc mchars_alloc(void)
67d65f6f70SBen Gras {
6892395e9cSLionel Sambuc 	struct mchars	 *tab;
69d65f6f70SBen Gras 	struct ln	**htab;
70d65f6f70SBen Gras 	struct ln	 *pp;
71d65f6f70SBen Gras 	int		  i, hash;
72d65f6f70SBen Gras 
73d65f6f70SBen Gras 	/*
74d65f6f70SBen Gras 	 * Constructs a very basic chaining hashtable.  The hash routine
75d65f6f70SBen Gras 	 * is simply the integral value of the first character.
7692395e9cSLionel Sambuc 	 * Subsequent entries are chained in the order they're processed.
77d65f6f70SBen Gras 	 */
78d65f6f70SBen Gras 
7992395e9cSLionel Sambuc 	tab = mandoc_malloc(sizeof(struct mchars));
80*0a6a1f1dSLionel Sambuc 	htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln *));
81d65f6f70SBen Gras 
82d65f6f70SBen Gras 	for (i = 0; i < LINES_MAX; i++) {
83d65f6f70SBen Gras 		hash = (int)lines[i].code[0] - PRINT_LO;
84d65f6f70SBen Gras 
85d65f6f70SBen Gras 		if (NULL == (pp = htab[hash])) {
86d65f6f70SBen Gras 			htab[hash] = &lines[i];
87d65f6f70SBen Gras 			continue;
88d65f6f70SBen Gras 		}
89d65f6f70SBen Gras 
90d65f6f70SBen Gras 		for ( ; pp->next; pp = pp->next)
91d65f6f70SBen Gras 			/* Scan ahead. */ ;
92d65f6f70SBen Gras 		pp->next = &lines[i];
93d65f6f70SBen Gras 	}
94d65f6f70SBen Gras 
95d65f6f70SBen Gras 	tab->htab = htab;
96d65f6f70SBen Gras 	return(tab);
97d65f6f70SBen Gras }
98d65f6f70SBen Gras 
99d65f6f70SBen Gras int
mchars_spec2cp(const struct mchars * arg,const char * p,size_t sz)10092395e9cSLionel Sambuc mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz)
101d65f6f70SBen Gras {
102d65f6f70SBen Gras 	const struct ln	*ln;
103d65f6f70SBen Gras 
10492395e9cSLionel Sambuc 	ln = find(arg, p, sz);
105d65f6f70SBen Gras 	if (NULL == ln)
106d65f6f70SBen Gras 		return(-1);
107d65f6f70SBen Gras 	return(ln->unicode);
108d65f6f70SBen Gras }
109d65f6f70SBen Gras 
11092395e9cSLionel Sambuc char
mchars_num2char(const char * p,size_t sz)11192395e9cSLionel Sambuc mchars_num2char(const char *p, size_t sz)
11292395e9cSLionel Sambuc {
11392395e9cSLionel Sambuc 	int		  i;
114d65f6f70SBen Gras 
11592395e9cSLionel Sambuc 	if ((i = mandoc_strntoi(p, sz, 10)) < 0)
11692395e9cSLionel Sambuc 		return('\0');
11792395e9cSLionel Sambuc 	return(i > 0 && i < 256 && isprint(i) ?
11892395e9cSLionel Sambuc 			/* LINTED */ i : '\0');
11992395e9cSLionel Sambuc }
12092395e9cSLionel Sambuc 
121d65f6f70SBen Gras int
mchars_num2uc(const char * p,size_t sz)12292395e9cSLionel Sambuc mchars_num2uc(const char *p, size_t sz)
123d65f6f70SBen Gras {
12492395e9cSLionel Sambuc 	int               i;
125d65f6f70SBen Gras 
12692395e9cSLionel Sambuc 	if ((i = mandoc_strntoi(p, sz, 16)) < 0)
12792395e9cSLionel Sambuc 		return('\0');
12892395e9cSLionel Sambuc 	/* FIXME: make sure we're not in a bogus range. */
12992395e9cSLionel Sambuc 	return(i > 0x80 && i <= 0x10FFFF ? i : '\0');
130d65f6f70SBen Gras }
131d65f6f70SBen Gras 
132d65f6f70SBen Gras const char *
mchars_spec2str(const struct mchars * arg,const char * p,size_t sz,size_t * rsz)13392395e9cSLionel Sambuc mchars_spec2str(const struct mchars *arg,
13492395e9cSLionel Sambuc 		const char *p, size_t sz, size_t *rsz)
135d65f6f70SBen Gras {
136d65f6f70SBen Gras 	const struct ln	*ln;
137d65f6f70SBen Gras 
13892395e9cSLionel Sambuc 	ln = find(arg, p, sz);
13992395e9cSLionel Sambuc 	if (NULL == ln) {
14092395e9cSLionel Sambuc 		*rsz = 1;
141d65f6f70SBen Gras 		return(NULL);
14292395e9cSLionel Sambuc 	}
143d65f6f70SBen Gras 
144d65f6f70SBen Gras 	*rsz = strlen(ln->ascii);
145d65f6f70SBen Gras 	return(ln->ascii);
146d65f6f70SBen Gras }
147d65f6f70SBen Gras 
148d65f6f70SBen Gras static const struct ln *
find(const struct mchars * tab,const char * p,size_t sz)14992395e9cSLionel Sambuc find(const struct mchars *tab, const char *p, size_t sz)
150d65f6f70SBen Gras {
15192395e9cSLionel Sambuc 	const struct ln	 *pp;
152d65f6f70SBen Gras 	int		  hash;
153d65f6f70SBen Gras 
154d65f6f70SBen Gras 	assert(p);
155d65f6f70SBen Gras 
15692395e9cSLionel Sambuc 	if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI)
157d65f6f70SBen Gras 		return(NULL);
158d65f6f70SBen Gras 
159d65f6f70SBen Gras 	hash = (int)p[0] - PRINT_LO;
160d65f6f70SBen Gras 
16192395e9cSLionel Sambuc 	for (pp = tab->htab[hash]; pp; pp = pp->next)
16292395e9cSLionel Sambuc 		if (0 == strncmp(pp->code, p, sz) &&
16392395e9cSLionel Sambuc 				'\0' == pp->code[(int)sz])
164d65f6f70SBen Gras 			return(pp);
165d65f6f70SBen Gras 
166d65f6f70SBen Gras 	return(NULL);
167d65f6f70SBen Gras }
168