xref: /freebsd-src/lib/libc/string/wcscoll.c (revision a0ee8cc636cd5c2374ec44ca71226564ea0bca95)
1 /*-
2  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
3  * Copyright (c) 2002 Tim J. Robbins
4  * All rights reserved.
5  *
6  * Copyright (c) 2011 The FreeBSD Foundation
7  * All rights reserved.
8  * Portions of this software were developed by David Chisnall
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <errno.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <wchar.h>
40 #include "collate.h"
41 
42 int
43 wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
44 {
45 	int len1, len2, pri1, pri2, ret;
46 	wchar_t *tr1 = NULL, *tr2 = NULL;
47 	int direc, pass;
48 
49 	FIX_LOCALE(locale);
50 	struct xlocale_collate *table =
51 		(struct xlocale_collate*)locale->components[XLC_COLLATE];
52 
53 	if (table->__collate_load_error)
54 		/*
55 		 * Locale has no special collating order or could not be
56 		 * loaded, do a fast binary comparison.
57 		 */
58 		return (wcscmp(ws1, ws2));
59 
60 	ret = 0;
61 
62 	/*
63 	 * Once upon a time we had code to try to optimize this, but
64 	 * it turns out that you really can't make many assumptions
65 	 * safely.  You absolutely have to run this pass by pass,
66 	 * because some passes will be ignored for a given character,
67 	 * while others will not.  Simpler locales will benefit from
68 	 * having fewer passes, and most comparisions should resolve
69 	 * during the primary pass anyway.
70 	 *
71 	 * Note that we do one final extra pass at the end to pick
72 	 * up UNDEFINED elements.  There is special handling for them.
73 	 */
74 	for (pass = 0; pass <= table->info->directive_count; pass++) {
75 
76 		const int32_t *st1 = NULL;
77 		const int32_t *st2 = NULL;
78 		const wchar_t	*w1 = ws1;
79 		const wchar_t	*w2 = ws2;
80 		int check1, check2;
81 
82 		/* special pass for UNDEFINED */
83 		if (pass == table->info->directive_count) {
84 			direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
85 		} else {
86 			direc = table->info->directive[pass];
87 		}
88 
89 		if (direc & DIRECTIVE_BACKWARD) {
90 			wchar_t *bp, *fp, c;
91 			if ((tr1 = wcsdup(w1)) == NULL)
92 				goto fail;
93 			bp = tr1;
94 			fp = tr1 + wcslen(tr1) - 1;
95 			while (bp < fp) {
96 				c = *bp;
97 				*bp++ = *fp;
98 				*fp-- = c;
99 			}
100 			if ((tr2 = wcsdup(w2)) == NULL)
101 				goto fail;
102 			bp = tr2;
103 			fp = tr2 + wcslen(tr2) - 1;
104 			while (bp < fp) {
105 				c = *bp;
106 				*bp++ = *fp;
107 				*fp-- = c;
108 			}
109 			w1 = tr1;
110 			w2 = tr2;
111 		}
112 
113 		if (direc & DIRECTIVE_POSITION) {
114 			while (*w1 && *w2) {
115 				pri1 = pri2 = 0;
116 				check1 = check2 = 1;
117 				while ((pri1 == pri2) && (check1 || check2)) {
118 					if (check1) {
119 						_collate_lookup(table, w1, &len1,
120 						    &pri1, pass, &st1);
121 						if (pri1 < 0) {
122 							errno = EINVAL;
123 							goto fail;
124 						}
125 						if (!pri1) {
126 							pri1 = COLLATE_MAX_PRIORITY;
127 							st1 = NULL;
128 						}
129 						check1 = (st1 != NULL);
130 					}
131 					if (check2) {
132 						_collate_lookup(table, w2, &len2,
133 						    &pri2, pass, &st2);
134 						if (pri2 < 0) {
135 							errno = EINVAL;
136 							goto fail;
137 						}
138 						if (!pri2) {
139 							pri2 = COLLATE_MAX_PRIORITY;
140 							st2 = NULL;
141 						}
142 						check2 = (st2 != NULL);
143 					}
144 				}
145 				if (pri1 != pri2) {
146 					ret = pri1 - pri2;
147 					goto end;
148 				}
149 				w1 += len1;
150 				w2 += len2;
151 			}
152 		} else {
153 			while (*w1 && *w2) {
154 				pri1 = pri2 = 0;
155 				check1 = check2 = 1;
156 				while ((pri1 == pri2) && (check1 || check2)) {
157 					while (check1 && *w1) {
158 						_collate_lookup(table, w1,
159 						    &len1, &pri1, pass, &st1);
160 						if (pri1 > 0)
161 							break;
162 						if (pri1 < 0) {
163 							errno = EINVAL;
164 							goto fail;
165 						}
166 						st1 = NULL;
167 						w1 += 1;
168 					}
169 					check1 = (st1 != NULL);
170 					while (check2 && *w2) {
171 						_collate_lookup(table, w2,
172 						    &len2, &pri2, pass, &st2);
173 						if (pri2 > 0)
174 							break;
175 						if (pri2 < 0) {
176 							errno = EINVAL;
177 							goto fail;
178 						}
179 						st2 = NULL;
180 						w2 += 1;
181 					}
182 					check2 = (st2 != NULL);
183 					if (!pri1 || !pri2)
184 						break;
185 				}
186 				if (!pri1 || !pri2)
187 					break;
188 				if (pri1 != pri2) {
189 					ret = pri1 - pri2;
190 					goto end;
191 				}
192 				w1 += len1;
193 				w2 += len2;
194 			}
195 		}
196 		if (!*w1) {
197 			if (*w2) {
198 				ret = -(int)*w2;
199 				goto end;
200 			}
201 		} else {
202 			ret = *w1;
203 			goto end;
204 		}
205 	}
206 	ret = 0;
207 
208 end:
209 	free(tr1);
210 	free(tr2);
211 
212 	return (ret);
213 
214 fail:
215 	ret = wcscmp(ws1, ws2);
216 	goto end;
217 }
218 
219 int
220 wcscoll(const wchar_t *ws1, const wchar_t *ws2)
221 {
222 	return wcscoll_l(ws1, ws2, __get_locale());
223 }
224