1ca99cfddSTim J. Robbins /*-
2*4d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
31de7b4b8SPedro F. Giffuni *
4ca99cfddSTim J. Robbins * Copyright (c) 2004 Tim J. Robbins.
5ca99cfddSTim J. Robbins * All rights reserved.
6ca99cfddSTim J. Robbins *
7ca99cfddSTim J. Robbins * Redistribution and use in source and binary forms, with or without
8ca99cfddSTim J. Robbins * modification, are permitted provided that the following conditions
9ca99cfddSTim J. Robbins * are met:
10ca99cfddSTim J. Robbins * 1. Redistributions of source code must retain the above copyright
11ca99cfddSTim J. Robbins * notice, this list of conditions and the following disclaimer.
12ca99cfddSTim J. Robbins * 2. Redistributions in binary form must reproduce the above copyright
13ca99cfddSTim J. Robbins * notice, this list of conditions and the following disclaimer in the
14ca99cfddSTim J. Robbins * documentation and/or other materials provided with the distribution.
15ca99cfddSTim J. Robbins *
16ca99cfddSTim J. Robbins * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17ca99cfddSTim J. Robbins * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18ca99cfddSTim J. Robbins * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ca99cfddSTim J. Robbins * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20ca99cfddSTim J. Robbins * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21ca99cfddSTim J. Robbins * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22ca99cfddSTim J. Robbins * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23ca99cfddSTim J. Robbins * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24ca99cfddSTim J. Robbins * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25ca99cfddSTim J. Robbins * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26ca99cfddSTim J. Robbins * SUCH DAMAGE.
27ca99cfddSTim J. Robbins */
28ca99cfddSTim J. Robbins /*
29ca99cfddSTim J. Robbins * "Set of characters" ADT implemented as a splay tree of extents, with
30ca99cfddSTim J. Robbins * a lookup table cache to simplify looking up the first bunch of
31ca99cfddSTim J. Robbins * characters (which are presumably more common than others).
32ca99cfddSTim J. Robbins */
33ca99cfddSTim J. Robbins
34ca99cfddSTim J. Robbins #include <sys/cdefs.h>
35ca99cfddSTim J. Robbins #include <assert.h>
36821df508SXin LI #include <stdbool.h>
37ca99cfddSTim J. Robbins #include <stdlib.h>
38821df508SXin LI #include <wchar.h>
39821df508SXin LI #include <wctype.h>
40ca99cfddSTim J. Robbins #include "cset.h"
41ca99cfddSTim J. Robbins
42ca99cfddSTim J. Robbins static struct csnode * cset_delete(struct csnode *, wchar_t);
43ca99cfddSTim J. Robbins static __inline int cset_rangecmp(struct csnode *, wchar_t);
44ca99cfddSTim J. Robbins static struct csnode * cset_splay(struct csnode *, wchar_t);
45ca99cfddSTim J. Robbins
46ca99cfddSTim J. Robbins /*
47ca99cfddSTim J. Robbins * cset_alloc --
48ca99cfddSTim J. Robbins * Allocate a set of characters.
49ca99cfddSTim J. Robbins */
50ca99cfddSTim J. Robbins struct cset *
cset_alloc(void)51ca99cfddSTim J. Robbins cset_alloc(void)
52ca99cfddSTim J. Robbins {
53ca99cfddSTim J. Robbins struct cset *cs;
54ca99cfddSTim J. Robbins
55ca99cfddSTim J. Robbins if ((cs = malloc(sizeof(*cs))) == NULL)
56ca99cfddSTim J. Robbins return (NULL);
57ca99cfddSTim J. Robbins cs->cs_root = NULL;
58ca99cfddSTim J. Robbins cs->cs_classes = NULL;
59ca99cfddSTim J. Robbins cs->cs_havecache = false;
609c8fd487STim J. Robbins cs->cs_invert = false;
61ca99cfddSTim J. Robbins return (cs);
62ca99cfddSTim J. Robbins }
63ca99cfddSTim J. Robbins
64ca99cfddSTim J. Robbins /*
65ca99cfddSTim J. Robbins * cset_add --
66ca99cfddSTim J. Robbins * Add a character to the set.
67ca99cfddSTim J. Robbins */
68ca99cfddSTim J. Robbins bool
cset_add(struct cset * cs,wchar_t ch)69ca99cfddSTim J. Robbins cset_add(struct cset *cs, wchar_t ch)
70ca99cfddSTim J. Robbins {
71ca99cfddSTim J. Robbins struct csnode *csn, *ncsn;
72ca99cfddSTim J. Robbins wchar_t oval;
73ca99cfddSTim J. Robbins
74ca99cfddSTim J. Robbins cs->cs_havecache = false;
75ca99cfddSTim J. Robbins
76ca99cfddSTim J. Robbins /*
77ca99cfddSTim J. Robbins * Inserting into empty tree; new item becomes the root.
78ca99cfddSTim J. Robbins */
79ca99cfddSTim J. Robbins if (cs->cs_root == NULL) {
80ca99cfddSTim J. Robbins csn = malloc(sizeof(*cs->cs_root));
81ca99cfddSTim J. Robbins if (csn == NULL)
82ca99cfddSTim J. Robbins return (false);
83ca99cfddSTim J. Robbins csn->csn_left = csn->csn_right = NULL;
84ca99cfddSTim J. Robbins csn->csn_min = csn->csn_max = ch;
85ca99cfddSTim J. Robbins cs->cs_root = csn;
86ca99cfddSTim J. Robbins return (true);
87ca99cfddSTim J. Robbins }
88ca99cfddSTim J. Robbins
89ca99cfddSTim J. Robbins /*
90ca99cfddSTim J. Robbins * Splay to check whether the item already exists, and otherwise,
91ca99cfddSTim J. Robbins * where we should put it.
92ca99cfddSTim J. Robbins */
93ca99cfddSTim J. Robbins csn = cs->cs_root = cset_splay(cs->cs_root, ch);
94ca99cfddSTim J. Robbins
95ca99cfddSTim J. Robbins /*
96cfab3bddSTim J. Robbins * Avoid adding duplicate nodes.
97ca99cfddSTim J. Robbins */
98ca99cfddSTim J. Robbins if (cset_rangecmp(csn, ch) == 0)
99ca99cfddSTim J. Robbins return (true);
100ca99cfddSTim J. Robbins
101ca99cfddSTim J. Robbins /*
102cfab3bddSTim J. Robbins * Allocate a new node and make it the new root.
103ca99cfddSTim J. Robbins */
104ca99cfddSTim J. Robbins ncsn = malloc(sizeof(*ncsn));
105ca99cfddSTim J. Robbins if (ncsn == NULL)
106ca99cfddSTim J. Robbins return (false);
107ca99cfddSTim J. Robbins ncsn->csn_min = ncsn->csn_max = ch;
108ca99cfddSTim J. Robbins if (cset_rangecmp(csn, ch) < 0) {
109ca99cfddSTim J. Robbins ncsn->csn_left = csn->csn_left;
110ca99cfddSTim J. Robbins ncsn->csn_right = csn;
111ca99cfddSTim J. Robbins csn->csn_left = NULL;
112ca99cfddSTim J. Robbins } else {
113ca99cfddSTim J. Robbins ncsn->csn_right = csn->csn_right;
114ca99cfddSTim J. Robbins ncsn->csn_left = csn;
115ca99cfddSTim J. Robbins csn->csn_right = NULL;
116ca99cfddSTim J. Robbins }
117ca99cfddSTim J. Robbins cs->cs_root = ncsn;
118ca99cfddSTim J. Robbins
119ca99cfddSTim J. Robbins /*
120cfab3bddSTim J. Robbins * Coalesce with left and right neighbours if possible.
121ca99cfddSTim J. Robbins */
122cfab3bddSTim J. Robbins if (ncsn->csn_left != NULL) {
123cfab3bddSTim J. Robbins ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1);
124cfab3bddSTim J. Robbins if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) {
125cfab3bddSTim J. Robbins oval = ncsn->csn_left->csn_min;
126cfab3bddSTim J. Robbins ncsn->csn_left = cset_delete(ncsn->csn_left,
127cfab3bddSTim J. Robbins ncsn->csn_left->csn_min);
128ca99cfddSTim J. Robbins ncsn->csn_min = oval;
129ca99cfddSTim J. Robbins }
130cfab3bddSTim J. Robbins }
131cfab3bddSTim J. Robbins if (ncsn->csn_right != NULL) {
132cfab3bddSTim J. Robbins ncsn->csn_right = cset_splay(ncsn->csn_right,
133cfab3bddSTim J. Robbins ncsn->csn_max + 1);
134cfab3bddSTim J. Robbins if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) {
135cfab3bddSTim J. Robbins oval = ncsn->csn_right->csn_max;
136cfab3bddSTim J. Robbins ncsn->csn_right = cset_delete(ncsn->csn_right,
137cfab3bddSTim J. Robbins ncsn->csn_right->csn_min);
138ca99cfddSTim J. Robbins ncsn->csn_max = oval;
139ca99cfddSTim J. Robbins }
140cfab3bddSTim J. Robbins }
141ca99cfddSTim J. Robbins
142ca99cfddSTim J. Robbins return (true);
143ca99cfddSTim J. Robbins }
144ca99cfddSTim J. Robbins
145ca99cfddSTim J. Robbins /*
146ca99cfddSTim J. Robbins * cset_in_hard --
147ca99cfddSTim J. Robbins * Determine whether a character is in the set without using
148ca99cfddSTim J. Robbins * the cache.
149ca99cfddSTim J. Robbins */
150ca99cfddSTim J. Robbins bool
cset_in_hard(struct cset * cs,wchar_t ch)151ca99cfddSTim J. Robbins cset_in_hard(struct cset *cs, wchar_t ch)
152ca99cfddSTim J. Robbins {
153ca99cfddSTim J. Robbins struct csclass *csc;
154ca99cfddSTim J. Robbins
155ca99cfddSTim J. Robbins for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next)
15650af444aSEd Schouten if (csc->csc_invert ^ (iswctype(ch, csc->csc_type) != 0))
157ca99cfddSTim J. Robbins return (cs->cs_invert ^ true);
158ca99cfddSTim J. Robbins if (cs->cs_root != NULL) {
159ca99cfddSTim J. Robbins cs->cs_root = cset_splay(cs->cs_root, ch);
16050af444aSEd Schouten return (cs->cs_invert ^ (cset_rangecmp(cs->cs_root, ch) == 0));
161ca99cfddSTim J. Robbins }
162ca99cfddSTim J. Robbins return (cs->cs_invert ^ false);
163ca99cfddSTim J. Robbins }
164ca99cfddSTim J. Robbins
165ca99cfddSTim J. Robbins /*
166ca99cfddSTim J. Robbins * cset_cache --
167ca99cfddSTim J. Robbins * Update the cache.
168ca99cfddSTim J. Robbins */
169ca99cfddSTim J. Robbins void
cset_cache(struct cset * cs)170ca99cfddSTim J. Robbins cset_cache(struct cset *cs)
171ca99cfddSTim J. Robbins {
172ca99cfddSTim J. Robbins wchar_t i;
173ca99cfddSTim J. Robbins
174ca99cfddSTim J. Robbins for (i = 0; i < CS_CACHE_SIZE; i++)
175ca99cfddSTim J. Robbins cs->cs_cache[i] = cset_in_hard(cs, i);
176ca99cfddSTim J. Robbins
177ca99cfddSTim J. Robbins cs->cs_havecache = true;
178ca99cfddSTim J. Robbins }
179ca99cfddSTim J. Robbins
180ca99cfddSTim J. Robbins /*
181ca99cfddSTim J. Robbins * cset_invert --
182ca99cfddSTim J. Robbins * Invert the character set.
183ca99cfddSTim J. Robbins */
184ca99cfddSTim J. Robbins void
cset_invert(struct cset * cs)185ca99cfddSTim J. Robbins cset_invert(struct cset *cs)
186ca99cfddSTim J. Robbins {
187ca99cfddSTim J. Robbins
188ca99cfddSTim J. Robbins cs->cs_invert ^= true;
189ca99cfddSTim J. Robbins cs->cs_havecache = false;
190ca99cfddSTim J. Robbins }
191ca99cfddSTim J. Robbins
192ca99cfddSTim J. Robbins /*
193ca99cfddSTim J. Robbins * cset_addclass --
194ca99cfddSTim J. Robbins * Add a wctype()-style character class to the set, optionally
195ca99cfddSTim J. Robbins * inverting it.
196ca99cfddSTim J. Robbins */
197ca99cfddSTim J. Robbins bool
cset_addclass(struct cset * cs,wctype_t type,bool invert)198ca99cfddSTim J. Robbins cset_addclass(struct cset *cs, wctype_t type, bool invert)
199ca99cfddSTim J. Robbins {
200ca99cfddSTim J. Robbins struct csclass *csc;
201ca99cfddSTim J. Robbins
202ca99cfddSTim J. Robbins csc = malloc(sizeof(*csc));
203ca99cfddSTim J. Robbins if (csc == NULL)
204ca99cfddSTim J. Robbins return (false);
205ca99cfddSTim J. Robbins csc->csc_type = type;
206ca99cfddSTim J. Robbins csc->csc_invert = invert;
207ca99cfddSTim J. Robbins csc->csc_next = cs->cs_classes;
208ca99cfddSTim J. Robbins cs->cs_classes = csc;
209ca99cfddSTim J. Robbins cs->cs_havecache = false;
210ca99cfddSTim J. Robbins return (true);
211ca99cfddSTim J. Robbins }
212ca99cfddSTim J. Robbins
213ca99cfddSTim J. Robbins static __inline int
cset_rangecmp(struct csnode * t,wchar_t ch)214ca99cfddSTim J. Robbins cset_rangecmp(struct csnode *t, wchar_t ch)
215ca99cfddSTim J. Robbins {
216ca99cfddSTim J. Robbins
217ca99cfddSTim J. Robbins if (ch < t->csn_min)
218ca99cfddSTim J. Robbins return (-1);
219ca99cfddSTim J. Robbins if (ch > t->csn_max)
220ca99cfddSTim J. Robbins return (1);
221ca99cfddSTim J. Robbins return (0);
222ca99cfddSTim J. Robbins }
223ca99cfddSTim J. Robbins
224ca99cfddSTim J. Robbins static struct csnode *
cset_splay(struct csnode * t,wchar_t ch)225ca99cfddSTim J. Robbins cset_splay(struct csnode *t, wchar_t ch)
226ca99cfddSTim J. Robbins {
227ca99cfddSTim J. Robbins struct csnode N, *l, *r, *y;
228ca99cfddSTim J. Robbins
229ca99cfddSTim J. Robbins /*
230ca99cfddSTim J. Robbins * Based on public domain code from Sleator.
231ca99cfddSTim J. Robbins */
232ca99cfddSTim J. Robbins
233ca99cfddSTim J. Robbins assert(t != NULL);
234ca99cfddSTim J. Robbins
235ca99cfddSTim J. Robbins N.csn_left = N.csn_right = NULL;
236ca99cfddSTim J. Robbins l = r = &N;
237ca99cfddSTim J. Robbins for (;;) {
238ca99cfddSTim J. Robbins if (cset_rangecmp(t, ch) < 0) {
239ca99cfddSTim J. Robbins if (t->csn_left != NULL &&
240ca99cfddSTim J. Robbins cset_rangecmp(t->csn_left, ch) < 0) {
241ca99cfddSTim J. Robbins y = t->csn_left;
242ca99cfddSTim J. Robbins t->csn_left = y->csn_right;
243ca99cfddSTim J. Robbins y->csn_right = t;
244ca99cfddSTim J. Robbins t = y;
245ca99cfddSTim J. Robbins }
246ca99cfddSTim J. Robbins if (t->csn_left == NULL)
247ca99cfddSTim J. Robbins break;
248ca99cfddSTim J. Robbins r->csn_left = t;
249ca99cfddSTim J. Robbins r = t;
250ca99cfddSTim J. Robbins t = t->csn_left;
251ca99cfddSTim J. Robbins } else if (cset_rangecmp(t, ch) > 0) {
252ca99cfddSTim J. Robbins if (t->csn_right != NULL &&
253ca99cfddSTim J. Robbins cset_rangecmp(t->csn_right, ch) > 0) {
254ca99cfddSTim J. Robbins y = t->csn_right;
255ca99cfddSTim J. Robbins t->csn_right = y->csn_left;
256ca99cfddSTim J. Robbins y->csn_left = t;
257ca99cfddSTim J. Robbins t = y;
258ca99cfddSTim J. Robbins }
259ca99cfddSTim J. Robbins if (t->csn_right == NULL)
260ca99cfddSTim J. Robbins break;
261ca99cfddSTim J. Robbins l->csn_right = t;
262ca99cfddSTim J. Robbins l = t;
263ca99cfddSTim J. Robbins t = t->csn_right;
264ca99cfddSTim J. Robbins } else
265ca99cfddSTim J. Robbins break;
266ca99cfddSTim J. Robbins }
267ca99cfddSTim J. Robbins l->csn_right = t->csn_left;
268ca99cfddSTim J. Robbins r->csn_left = t->csn_right;
269ca99cfddSTim J. Robbins t->csn_left = N.csn_right;
270ca99cfddSTim J. Robbins t->csn_right = N.csn_left;
271ca99cfddSTim J. Robbins return (t);
272ca99cfddSTim J. Robbins }
273ca99cfddSTim J. Robbins
274ca99cfddSTim J. Robbins static struct csnode *
cset_delete(struct csnode * t,wchar_t ch)275ca99cfddSTim J. Robbins cset_delete(struct csnode *t, wchar_t ch)
276ca99cfddSTim J. Robbins {
277ca99cfddSTim J. Robbins struct csnode *x;
278ca99cfddSTim J. Robbins
279ca99cfddSTim J. Robbins assert(t != NULL);
280ca99cfddSTim J. Robbins t = cset_splay(t, ch);
281ca99cfddSTim J. Robbins assert(cset_rangecmp(t, ch) == 0);
282ca99cfddSTim J. Robbins if (t->csn_left == NULL)
283ca99cfddSTim J. Robbins x = t->csn_right;
284ca99cfddSTim J. Robbins else {
285ca99cfddSTim J. Robbins x = cset_splay(t->csn_left, ch);
286ca99cfddSTim J. Robbins x->csn_right = t->csn_right;
287ca99cfddSTim J. Robbins }
288ca99cfddSTim J. Robbins free(t);
289ca99cfddSTim J. Robbins return x;
290ca99cfddSTim J. Robbins }
291