xref: /freebsd-src/usr.bin/sort/bwstring.c (revision bd234c0d4c8256db7e5a1fdda9ef311c9e0080e4)
1c66bbc91SGabor Kovesdan /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
31de7b4b8SPedro F. Giffuni  *
4c66bbc91SGabor Kovesdan  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5c859c6ddSGabor Kovesdan  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6c66bbc91SGabor Kovesdan  * All rights reserved.
7c66bbc91SGabor Kovesdan  *
8c66bbc91SGabor Kovesdan  * Redistribution and use in source and binary forms, with or without
9c66bbc91SGabor Kovesdan  * modification, are permitted provided that the following conditions
10c66bbc91SGabor Kovesdan  * are met:
11c66bbc91SGabor Kovesdan  * 1. Redistributions of source code must retain the above copyright
12c66bbc91SGabor Kovesdan  *    notice, this list of conditions and the following disclaimer.
13c66bbc91SGabor Kovesdan  * 2. Redistributions in binary form must reproduce the above copyright
14c66bbc91SGabor Kovesdan  *    notice, this list of conditions and the following disclaimer in the
15c66bbc91SGabor Kovesdan  *    documentation and/or other materials provided with the distribution.
16c66bbc91SGabor Kovesdan  *
17c66bbc91SGabor Kovesdan  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18c66bbc91SGabor Kovesdan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19c66bbc91SGabor Kovesdan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20c66bbc91SGabor Kovesdan  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21c66bbc91SGabor Kovesdan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22c66bbc91SGabor Kovesdan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23c66bbc91SGabor Kovesdan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24c66bbc91SGabor Kovesdan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25c66bbc91SGabor Kovesdan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26c66bbc91SGabor Kovesdan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27c66bbc91SGabor Kovesdan  * SUCH DAMAGE.
28c66bbc91SGabor Kovesdan  */
29c66bbc91SGabor Kovesdan 
30c66bbc91SGabor Kovesdan #include <sys/cdefs.h>
31c66bbc91SGabor Kovesdan #include <ctype.h>
32c66bbc91SGabor Kovesdan #include <errno.h>
33c66bbc91SGabor Kovesdan #include <err.h>
34c66bbc91SGabor Kovesdan #include <langinfo.h>
35c66bbc91SGabor Kovesdan #include <math.h>
36c66bbc91SGabor Kovesdan #include <stdlib.h>
37c66bbc91SGabor Kovesdan #include <string.h>
38c66bbc91SGabor Kovesdan #include <wchar.h>
39c66bbc91SGabor Kovesdan #include <wctype.h>
40c66bbc91SGabor Kovesdan 
41c66bbc91SGabor Kovesdan #include "bwstring.h"
42c66bbc91SGabor Kovesdan #include "sort.h"
43c66bbc91SGabor Kovesdan 
44ce1e997fSGabor Kovesdan bool byte_sort;
45c66bbc91SGabor Kovesdan 
463d44dce9SChristos Margiolis struct wmonth {
473d44dce9SChristos Margiolis 	wchar_t *mon;
483d44dce9SChristos Margiolis 	wchar_t *ab;
493d44dce9SChristos Margiolis 	wchar_t *alt;
503d44dce9SChristos Margiolis };
51c66bbc91SGabor Kovesdan 
523d44dce9SChristos Margiolis struct cmonth {
533d44dce9SChristos Margiolis 	char *mon;
543d44dce9SChristos Margiolis 	char *ab;
553d44dce9SChristos Margiolis 	char *alt;
563d44dce9SChristos Margiolis };
573d44dce9SChristos Margiolis 
583d44dce9SChristos Margiolis static struct wmonth *wmonths;
593d44dce9SChristos Margiolis static struct cmonth *cmonths;
603d44dce9SChristos Margiolis 
613d44dce9SChristos Margiolis static int
populate_cmonth(char ** field,const nl_item item,int idx)623d44dce9SChristos Margiolis populate_cmonth(char **field, const nl_item item, int idx)
633d44dce9SChristos Margiolis {
643d44dce9SChristos Margiolis 	char *tmp, *m;
653d44dce9SChristos Margiolis 	size_t i, len;
663d44dce9SChristos Margiolis 
673d44dce9SChristos Margiolis 	tmp = nl_langinfo(item);
683d44dce9SChristos Margiolis 	if (debug_sort)
693d44dce9SChristos Margiolis 		printf("month[%d]=%s\n", idx, tmp);
703d44dce9SChristos Margiolis 	if (*tmp == '\0')
713d44dce9SChristos Margiolis 		return (0);
723d44dce9SChristos Margiolis 	m = sort_strdup(tmp);
733d44dce9SChristos Margiolis 	len = strlen(tmp);
743d44dce9SChristos Margiolis 	for (i = 0; i < len; i++)
753d44dce9SChristos Margiolis 		m[i] = toupper(m[i]);
763d44dce9SChristos Margiolis 	*field = m;
773d44dce9SChristos Margiolis 
783d44dce9SChristos Margiolis 	return (1);
793d44dce9SChristos Margiolis }
803d44dce9SChristos Margiolis 
813d44dce9SChristos Margiolis static int
populate_wmonth(wchar_t ** field,const nl_item item,int idx)823d44dce9SChristos Margiolis populate_wmonth(wchar_t **field, const nl_item item, int idx)
833d44dce9SChristos Margiolis {
843d44dce9SChristos Margiolis 	wchar_t *m;
853d44dce9SChristos Margiolis 	char *tmp;
863d44dce9SChristos Margiolis 	size_t i, len;
873d44dce9SChristos Margiolis 
883d44dce9SChristos Margiolis 	tmp = nl_langinfo(item);
893d44dce9SChristos Margiolis 	if (debug_sort)
903d44dce9SChristos Margiolis 		printf("month[%d]=%s\n", idx, tmp);
913d44dce9SChristos Margiolis 	if (*tmp == '\0')
923d44dce9SChristos Margiolis 		return (0);
933d44dce9SChristos Margiolis 	len = strlen(tmp);
943d44dce9SChristos Margiolis 	m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
953d44dce9SChristos Margiolis 	if (mbstowcs(m, tmp, len) == ((size_t) - 1)) {
963d44dce9SChristos Margiolis 		sort_free(m);
973d44dce9SChristos Margiolis 		return (0);
983d44dce9SChristos Margiolis 	}
993d44dce9SChristos Margiolis 	m[len] = L'\0';
1003d44dce9SChristos Margiolis 	for (i = 0; i < len; i++)
1013d44dce9SChristos Margiolis 		m[i] = towupper(m[i]);
1023d44dce9SChristos Margiolis 	*field = m;
1033d44dce9SChristos Margiolis 
1043d44dce9SChristos Margiolis 	return (1);
1053d44dce9SChristos Margiolis }
106c66bbc91SGabor Kovesdan 
107c66bbc91SGabor Kovesdan void
initialise_months(void)108c66bbc91SGabor Kovesdan initialise_months(void)
109c66bbc91SGabor Kovesdan {
1103d44dce9SChristos Margiolis 	const nl_item mon_item[12] = { MON_1, MON_2, MON_3, MON_4,
1113d44dce9SChristos Margiolis 	    MON_5, MON_6, MON_7, MON_8, MON_9, MON_10,
1123d44dce9SChristos Margiolis 	    MON_11, MON_12 };
1133d44dce9SChristos Margiolis 	const nl_item ab_item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
114c66bbc91SGabor Kovesdan 	    ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
115c66bbc91SGabor Kovesdan 	    ABMON_11, ABMON_12 };
116*bd234c0dSWarner Losh #ifdef ALTMON_1
1173d44dce9SChristos Margiolis 	const nl_item alt_item[12] = { ALTMON_1, ALTMON_2, ALTMON_3, ALTMON_4,
1183d44dce9SChristos Margiolis 	    ALTMON_5, ALTMON_6, ALTMON_7, ALTMON_8, ALTMON_9, ALTMON_10,
1193d44dce9SChristos Margiolis 	    ALTMON_11, ALTMON_12 };
120*bd234c0dSWarner Losh #endif
1213d44dce9SChristos Margiolis 	int i;
122c66bbc91SGabor Kovesdan 
1233d44dce9SChristos Margiolis 	/*
1243d44dce9SChristos Margiolis 	 * Handle all possible month formats: abbrevation, full name,
1253d44dce9SChristos Margiolis 	 * standalone name (without case ending).
1263d44dce9SChristos Margiolis 	 */
12771ec05a2SCyril Zhang 	if (mb_cur_max == 1) {
128c66bbc91SGabor Kovesdan 		if (cmonths == NULL) {
1293d44dce9SChristos Margiolis 			cmonths = sort_malloc(sizeof(struct cmonth) * 12);
1303d44dce9SChristos Margiolis 			for (i = 0; i < 12; i++) {
1313d44dce9SChristos Margiolis 				if (!populate_cmonth(&cmonths[i].mon,
1323d44dce9SChristos Margiolis 				    mon_item[i], i))
133c66bbc91SGabor Kovesdan 					continue;
1343d44dce9SChristos Margiolis 				if (!populate_cmonth(&cmonths[i].ab,
1353d44dce9SChristos Margiolis 				    ab_item[i], i))
1363d44dce9SChristos Margiolis 					continue;
137*bd234c0dSWarner Losh #ifdef ALTMON_1
1383d44dce9SChristos Margiolis 				if (!populate_cmonth(&cmonths[i].alt,
1393d44dce9SChristos Margiolis 				    alt_item[i], i))
1403d44dce9SChristos Margiolis 					continue;
141*bd234c0dSWarner Losh #else
142*bd234c0dSWarner Losh 				cmonths[i].alt = NULL;
143*bd234c0dSWarner Losh #endif
144c66bbc91SGabor Kovesdan 			}
145c66bbc91SGabor Kovesdan 		}
146c66bbc91SGabor Kovesdan 
147c66bbc91SGabor Kovesdan 	} else {
148c66bbc91SGabor Kovesdan 		if (wmonths == NULL) {
1493d44dce9SChristos Margiolis 			wmonths = sort_malloc(sizeof(struct wmonth) * 12);
1503d44dce9SChristos Margiolis 			for (i = 0; i < 12; i++) {
1513d44dce9SChristos Margiolis 				if (!populate_wmonth(&wmonths[i].mon,
1523d44dce9SChristos Margiolis 				    mon_item[i], i))
153c66bbc91SGabor Kovesdan 					continue;
1543d44dce9SChristos Margiolis 				if (!populate_wmonth(&wmonths[i].ab,
1553d44dce9SChristos Margiolis 				    ab_item[i], i))
156c66bbc91SGabor Kovesdan 					continue;
157*bd234c0dSWarner Losh #ifdef ALTMON_1
1583d44dce9SChristos Margiolis 				if (!populate_wmonth(&wmonths[i].alt,
1593d44dce9SChristos Margiolis 				    alt_item[i], i))
1603d44dce9SChristos Margiolis 					continue;
161*bd234c0dSWarner Losh #else
162*bd234c0dSWarner Losh 				wmonths[i].alt = NULL;
163*bd234c0dSWarner Losh #endif
164c66bbc91SGabor Kovesdan 			}
165c66bbc91SGabor Kovesdan 		}
166c66bbc91SGabor Kovesdan 	}
167c66bbc91SGabor Kovesdan }
168c66bbc91SGabor Kovesdan 
169c66bbc91SGabor Kovesdan /*
170c66bbc91SGabor Kovesdan  * Compare two wide-character strings
171c66bbc91SGabor Kovesdan  */
172c66bbc91SGabor Kovesdan static int
wide_str_coll(const wchar_t * s1,const wchar_t * s2)173c66bbc91SGabor Kovesdan wide_str_coll(const wchar_t *s1, const wchar_t *s2)
174c66bbc91SGabor Kovesdan {
175d053fb22SAlex Richardson 	int ret;
176c66bbc91SGabor Kovesdan 
177c66bbc91SGabor Kovesdan 	errno = 0;
178c66bbc91SGabor Kovesdan 	ret = wcscoll(s1, s2);
179c66bbc91SGabor Kovesdan 	if (errno == EILSEQ) {
180c66bbc91SGabor Kovesdan 		errno = 0;
181c66bbc91SGabor Kovesdan 		ret = wcscmp(s1, s2);
182c66bbc91SGabor Kovesdan 		if (errno != 0) {
183c66bbc91SGabor Kovesdan 			for (size_t i = 0; ; ++i) {
184c66bbc91SGabor Kovesdan 				wchar_t c1 = s1[i];
185c66bbc91SGabor Kovesdan 				wchar_t c2 = s2[i];
186c66bbc91SGabor Kovesdan 				if (c1 == L'\0')
187c66bbc91SGabor Kovesdan 					return ((c2 == L'\0') ? 0 : -1);
188c66bbc91SGabor Kovesdan 				if (c2 == L'\0')
189c66bbc91SGabor Kovesdan 					return (+1);
190c66bbc91SGabor Kovesdan 				if (c1 == c2)
191c66bbc91SGabor Kovesdan 					continue;
192c66bbc91SGabor Kovesdan 				return ((int)(c1 - c2));
193c66bbc91SGabor Kovesdan 			}
194c66bbc91SGabor Kovesdan 		}
195c66bbc91SGabor Kovesdan 	}
196c66bbc91SGabor Kovesdan 	return (ret);
197c66bbc91SGabor Kovesdan }
198c66bbc91SGabor Kovesdan 
199c66bbc91SGabor Kovesdan /* counterparts of wcs functions */
200c66bbc91SGabor Kovesdan 
201c66bbc91SGabor Kovesdan void
bwsprintf(FILE * f,struct bwstring * bws,const char * prefix,const char * suffix)202c66bbc91SGabor Kovesdan bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
203c66bbc91SGabor Kovesdan {
204e5f71a07SPedro F. Giffuni 
20571ec05a2SCyril Zhang 	if (mb_cur_max == 1)
206d053fb22SAlex Richardson 		fprintf(f, "%s%s%s", prefix, bws->cdata.str, suffix);
207c66bbc91SGabor Kovesdan 	else
208d053fb22SAlex Richardson 		fprintf(f, "%s%S%s", prefix, bws->wdata.str, suffix);
209c66bbc91SGabor Kovesdan }
210c66bbc91SGabor Kovesdan 
bwsrawdata(const struct bwstring * bws)211c66bbc91SGabor Kovesdan const void* bwsrawdata(const struct bwstring *bws)
212c66bbc91SGabor Kovesdan {
213e5f71a07SPedro F. Giffuni 
214e9bfb50dSMark Johnston 	return (bws->wdata.str);
215c66bbc91SGabor Kovesdan }
216c66bbc91SGabor Kovesdan 
bwsrawlen(const struct bwstring * bws)217c66bbc91SGabor Kovesdan size_t bwsrawlen(const struct bwstring *bws)
218c66bbc91SGabor Kovesdan {
219e5f71a07SPedro F. Giffuni 
220d053fb22SAlex Richardson 	return ((mb_cur_max == 1) ? bws->cdata.len :
221d053fb22SAlex Richardson 	    SIZEOF_WCHAR_STRING(bws->wdata.len));
222c66bbc91SGabor Kovesdan }
223c66bbc91SGabor Kovesdan 
224c66bbc91SGabor Kovesdan size_t
bws_memsize(const struct bwstring * bws)225c66bbc91SGabor Kovesdan bws_memsize(const struct bwstring *bws)
226c66bbc91SGabor Kovesdan {
227e5f71a07SPedro F. Giffuni 
228d053fb22SAlex Richardson 	return ((mb_cur_max == 1) ?
229d053fb22SAlex Richardson 	    (bws->cdata.len + 2 + sizeof(struct bwstring)) :
230d053fb22SAlex Richardson 	    (SIZEOF_WCHAR_STRING(bws->wdata.len + 1) + sizeof(struct bwstring)));
231c66bbc91SGabor Kovesdan }
232c66bbc91SGabor Kovesdan 
233c66bbc91SGabor Kovesdan void
bws_setlen(struct bwstring * bws,size_t newlen)234c66bbc91SGabor Kovesdan bws_setlen(struct bwstring *bws, size_t newlen)
235c66bbc91SGabor Kovesdan {
236e5f71a07SPedro F. Giffuni 
237d053fb22SAlex Richardson 	if (mb_cur_max == 1 && bws && newlen != bws->cdata.len &&
238d053fb22SAlex Richardson 	    newlen <= bws->cdata.len) {
239d053fb22SAlex Richardson 		bws->cdata.len = newlen;
240d053fb22SAlex Richardson 		bws->cdata.str[newlen] = '\0';
241d053fb22SAlex Richardson 	} else if (bws && newlen != bws->wdata.len && newlen <= bws->wdata.len) {
242d053fb22SAlex Richardson 		bws->wdata.len = newlen;
243d053fb22SAlex Richardson 		bws->wdata.str[newlen] = L'\0';
244c66bbc91SGabor Kovesdan 	}
245c66bbc91SGabor Kovesdan }
246c66bbc91SGabor Kovesdan 
247c66bbc91SGabor Kovesdan /*
248c66bbc91SGabor Kovesdan  * Allocate a new binary string of specified size
249c66bbc91SGabor Kovesdan  */
250c66bbc91SGabor Kovesdan struct bwstring *
bwsalloc(size_t sz)251c66bbc91SGabor Kovesdan bwsalloc(size_t sz)
252c66bbc91SGabor Kovesdan {
253c66bbc91SGabor Kovesdan 	struct bwstring *ret;
254c66bbc91SGabor Kovesdan 
255d053fb22SAlex Richardson 	if (mb_cur_max == 1) {
256c66bbc91SGabor Kovesdan 		ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
257d053fb22SAlex Richardson 		ret->cdata.len = sz;
258d053fb22SAlex Richardson 		ret->cdata.str[sz] = '\0';
259d053fb22SAlex Richardson 	} else {
260d053fb22SAlex Richardson 		ret = sort_malloc(
261d053fb22SAlex Richardson 		    sizeof(struct bwstring) + SIZEOF_WCHAR_STRING(sz + 1));
262d053fb22SAlex Richardson 		ret->wdata.len = sz;
263d053fb22SAlex Richardson 		ret->wdata.str[sz] = L'\0';
264d053fb22SAlex Richardson 	}
265c66bbc91SGabor Kovesdan 
266c66bbc91SGabor Kovesdan 	return (ret);
267c66bbc91SGabor Kovesdan }
268c66bbc91SGabor Kovesdan 
269c66bbc91SGabor Kovesdan /*
270c66bbc91SGabor Kovesdan  * Create a copy of binary string.
271c66bbc91SGabor Kovesdan  * New string size equals the length of the old string.
272c66bbc91SGabor Kovesdan  */
273c66bbc91SGabor Kovesdan struct bwstring *
bwsdup(const struct bwstring * s)274c66bbc91SGabor Kovesdan bwsdup(const struct bwstring *s)
275c66bbc91SGabor Kovesdan {
276e5f71a07SPedro F. Giffuni 
277c66bbc91SGabor Kovesdan 	if (s == NULL)
278c66bbc91SGabor Kovesdan 		return (NULL);
279c66bbc91SGabor Kovesdan 	else {
280d053fb22SAlex Richardson 		struct bwstring *ret = bwsalloc(BWSLEN(s));
281c66bbc91SGabor Kovesdan 
28271ec05a2SCyril Zhang 		if (mb_cur_max == 1)
283d053fb22SAlex Richardson 			memcpy(ret->cdata.str, s->cdata.str, (s->cdata.len));
284c66bbc91SGabor Kovesdan 		else
285d053fb22SAlex Richardson 			memcpy(ret->wdata.str, s->wdata.str,
286d053fb22SAlex Richardson 			    SIZEOF_WCHAR_STRING(s->wdata.len));
287c66bbc91SGabor Kovesdan 
288c66bbc91SGabor Kovesdan 		return (ret);
289c66bbc91SGabor Kovesdan 	}
290c66bbc91SGabor Kovesdan }
291c66bbc91SGabor Kovesdan 
292c66bbc91SGabor Kovesdan /*
293bd0f80c6SPedro F. Giffuni  * Create a new binary string from a wide character buffer.
294c66bbc91SGabor Kovesdan  */
295c66bbc91SGabor Kovesdan struct bwstring *
bwssbdup(const wchar_t * str,size_t len)296c66bbc91SGabor Kovesdan bwssbdup(const wchar_t *str, size_t len)
297c66bbc91SGabor Kovesdan {
298e5f71a07SPedro F. Giffuni 
299c66bbc91SGabor Kovesdan 	if (str == NULL)
300c66bbc91SGabor Kovesdan 		return ((len == 0) ? bwsalloc(0) : NULL);
301c66bbc91SGabor Kovesdan 	else {
302c66bbc91SGabor Kovesdan 		struct bwstring *ret;
303c66bbc91SGabor Kovesdan 
304c66bbc91SGabor Kovesdan 		ret = bwsalloc(len);
305c66bbc91SGabor Kovesdan 
30671ec05a2SCyril Zhang 		if (mb_cur_max == 1)
307c66bbc91SGabor Kovesdan 			for (size_t i = 0; i < len; ++i)
308d053fb22SAlex Richardson 				ret->cdata.str[i] = (char)str[i];
309c66bbc91SGabor Kovesdan 		else
310d053fb22SAlex Richardson 			memcpy(ret->wdata.str, str, SIZEOF_WCHAR_STRING(len));
311c66bbc91SGabor Kovesdan 
312c66bbc91SGabor Kovesdan 		return (ret);
313c66bbc91SGabor Kovesdan 	}
314c66bbc91SGabor Kovesdan }
315c66bbc91SGabor Kovesdan 
316c66bbc91SGabor Kovesdan /*
317c66bbc91SGabor Kovesdan  * Create a new binary string from a raw binary buffer.
318c66bbc91SGabor Kovesdan  */
319c66bbc91SGabor Kovesdan struct bwstring *
bwscsbdup(const unsigned char * str,size_t len)320c66bbc91SGabor Kovesdan bwscsbdup(const unsigned char *str, size_t len)
321c66bbc91SGabor Kovesdan {
322c66bbc91SGabor Kovesdan 	struct bwstring *ret;
323c66bbc91SGabor Kovesdan 
324c66bbc91SGabor Kovesdan 	ret = bwsalloc(len);
325c66bbc91SGabor Kovesdan 
326c66bbc91SGabor Kovesdan 	if (str) {
32771ec05a2SCyril Zhang 		if (mb_cur_max == 1)
328d053fb22SAlex Richardson 			memcpy(ret->cdata.str, str, len);
329c66bbc91SGabor Kovesdan 		else {
330c66bbc91SGabor Kovesdan 			mbstate_t mbs;
331c66bbc91SGabor Kovesdan 			const char *s;
332c66bbc91SGabor Kovesdan 			size_t charlen, chars, cptr;
333c66bbc91SGabor Kovesdan 
334759a9a9dSPedro F. Giffuni 			chars = 0;
335c66bbc91SGabor Kovesdan 			cptr = 0;
336c66bbc91SGabor Kovesdan 			s = (const char *) str;
337c66bbc91SGabor Kovesdan 
338c66bbc91SGabor Kovesdan 			memset(&mbs, 0, sizeof(mbs));
339c66bbc91SGabor Kovesdan 
340c66bbc91SGabor Kovesdan 			while (cptr < len) {
34171ec05a2SCyril Zhang 				size_t n = mb_cur_max;
342c66bbc91SGabor Kovesdan 
343c66bbc91SGabor Kovesdan 				if (n > len - cptr)
344c66bbc91SGabor Kovesdan 					n = len - cptr;
345c66bbc91SGabor Kovesdan 				charlen = mbrlen(s + cptr, n, &mbs);
346c66bbc91SGabor Kovesdan 				switch (charlen) {
347c66bbc91SGabor Kovesdan 				case 0:
348c66bbc91SGabor Kovesdan 					/* FALLTHROUGH */
349c66bbc91SGabor Kovesdan 				case (size_t) -1:
350c66bbc91SGabor Kovesdan 					/* FALLTHROUGH */
351c66bbc91SGabor Kovesdan 				case (size_t) -2:
352d053fb22SAlex Richardson 					ret->wdata.str[chars++] =
353c66bbc91SGabor Kovesdan 					    (unsigned char) s[cptr];
354c66bbc91SGabor Kovesdan 					++cptr;
355c66bbc91SGabor Kovesdan 					break;
356c66bbc91SGabor Kovesdan 				default:
357d053fb22SAlex Richardson 					n = mbrtowc(ret->wdata.str + (chars++),
358c66bbc91SGabor Kovesdan 					    s + cptr, charlen, &mbs);
359c66bbc91SGabor Kovesdan 					if ((n == (size_t)-1) || (n == (size_t)-2))
360c66bbc91SGabor Kovesdan 						/* NOTREACHED */
361c66bbc91SGabor Kovesdan 						err(2, "mbrtowc error");
362c66bbc91SGabor Kovesdan 					cptr += charlen;
36380c7cc1cSPedro F. Giffuni 				}
364c66bbc91SGabor Kovesdan 			}
365c66bbc91SGabor Kovesdan 
366d053fb22SAlex Richardson 			ret->wdata.len = chars;
367d053fb22SAlex Richardson 			ret->wdata.str[ret->wdata.len] = L'\0';
368c66bbc91SGabor Kovesdan 		}
369c66bbc91SGabor Kovesdan 	}
370c66bbc91SGabor Kovesdan 	return (ret);
371c66bbc91SGabor Kovesdan }
372c66bbc91SGabor Kovesdan 
373c66bbc91SGabor Kovesdan /*
374c66bbc91SGabor Kovesdan  * De-allocate object memory
375c66bbc91SGabor Kovesdan  */
376c66bbc91SGabor Kovesdan void
bwsfree(const struct bwstring * s)377c66bbc91SGabor Kovesdan bwsfree(const struct bwstring *s)
378c66bbc91SGabor Kovesdan {
379e5f71a07SPedro F. Giffuni 
380c66bbc91SGabor Kovesdan 	if (s)
381c66bbc91SGabor Kovesdan 		sort_free(s);
382c66bbc91SGabor Kovesdan }
383c66bbc91SGabor Kovesdan 
384c66bbc91SGabor Kovesdan /*
385c66bbc91SGabor Kovesdan  * Copy content of src binary string to dst,
386c66bbc91SGabor Kovesdan  * with specified number of symbols to be copied.
387c66bbc91SGabor Kovesdan  * An offset value can be specified, from the start of src string.
388c66bbc91SGabor Kovesdan  * If the capacity of the dst string is not sufficient,
389c66bbc91SGabor Kovesdan  * then the data is truncated.
390c66bbc91SGabor Kovesdan  */
391c66bbc91SGabor Kovesdan struct bwstring *
bwsnocpy(struct bwstring * dst,const struct bwstring * src,size_t offset,size_t size)392c66bbc91SGabor Kovesdan bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
393c66bbc91SGabor Kovesdan     size_t size)
394c66bbc91SGabor Kovesdan {
395e5f71a07SPedro F. Giffuni 
396d053fb22SAlex Richardson 	if (offset >= BWSLEN(src)) {
397d053fb22SAlex Richardson 		bws_setlen(dst, 0);
398c66bbc91SGabor Kovesdan 	} else {
399d053fb22SAlex Richardson 		size_t nums = BWSLEN(src) - offset;
400c66bbc91SGabor Kovesdan 
401d053fb22SAlex Richardson 		if (nums > BWSLEN(dst))
402d053fb22SAlex Richardson 			nums = BWSLEN(dst);
403c66bbc91SGabor Kovesdan 		if (nums > size)
404c66bbc91SGabor Kovesdan 			nums = size;
40571ec05a2SCyril Zhang 		if (mb_cur_max == 1) {
406d053fb22SAlex Richardson 			memcpy(dst->cdata.str, src->cdata.str + offset, nums);
407d053fb22SAlex Richardson 			dst->cdata.len = nums;
408d053fb22SAlex Richardson 			dst->cdata.str[nums] = '\0';
409c66bbc91SGabor Kovesdan 		} else {
410d053fb22SAlex Richardson 			memcpy(dst->wdata.str, src->wdata.str + offset,
411c66bbc91SGabor Kovesdan 			    SIZEOF_WCHAR_STRING(nums));
412d053fb22SAlex Richardson 			dst->wdata.len = nums;
413d053fb22SAlex Richardson 			dst->wdata.str[nums] = L'\0';
414c66bbc91SGabor Kovesdan 		}
415c66bbc91SGabor Kovesdan 	}
416c66bbc91SGabor Kovesdan 	return (dst);
417c66bbc91SGabor Kovesdan }
418c66bbc91SGabor Kovesdan 
419c66bbc91SGabor Kovesdan /*
420c66bbc91SGabor Kovesdan  * Write binary string to the file.
421c66bbc91SGabor Kovesdan  * The output is ended either with '\n' (nl == true)
422c66bbc91SGabor Kovesdan  * or '\0' (nl == false).
423c66bbc91SGabor Kovesdan  */
424e8da8c74SGabor Kovesdan size_t
bwsfwrite(struct bwstring * bws,FILE * f,bool zero_ended)425c66bbc91SGabor Kovesdan bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
426c66bbc91SGabor Kovesdan {
427e5f71a07SPedro F. Giffuni 
42871ec05a2SCyril Zhang 	if (mb_cur_max == 1) {
429d053fb22SAlex Richardson 		size_t len = bws->cdata.len;
430c66bbc91SGabor Kovesdan 
431c66bbc91SGabor Kovesdan 		if (!zero_ended) {
432d053fb22SAlex Richardson 			bws->cdata.str[len] = '\n';
433c66bbc91SGabor Kovesdan 
434d053fb22SAlex Richardson 			if (fwrite(bws->cdata.str, len + 1, 1, f) < 1)
435c66bbc91SGabor Kovesdan 				err(2, NULL);
436c66bbc91SGabor Kovesdan 
437d053fb22SAlex Richardson 			bws->cdata.str[len] = '\0';
438d053fb22SAlex Richardson 		} else if (fwrite(bws->cdata.str, len + 1, 1, f) < 1)
439c66bbc91SGabor Kovesdan 			err(2, NULL);
440c66bbc91SGabor Kovesdan 
441c66bbc91SGabor Kovesdan 		return (len + 1);
442c66bbc91SGabor Kovesdan 
443c66bbc91SGabor Kovesdan 	} else {
444c66bbc91SGabor Kovesdan 		wchar_t eols;
445e8da8c74SGabor Kovesdan 		size_t printed = 0;
446c66bbc91SGabor Kovesdan 
447c66bbc91SGabor Kovesdan 		eols = zero_ended ? btowc('\0') : btowc('\n');
448c66bbc91SGabor Kovesdan 
449e8da8c74SGabor Kovesdan 		while (printed < BWSLEN(bws)) {
450d053fb22SAlex Richardson 			const wchar_t *s = bws->wdata.str + printed;
451c66bbc91SGabor Kovesdan 
452c66bbc91SGabor Kovesdan 			if (*s == L'\0') {
453c66bbc91SGabor Kovesdan 				int nums;
454c66bbc91SGabor Kovesdan 
455c66bbc91SGabor Kovesdan 				nums = fwprintf(f, L"%lc", *s);
456c66bbc91SGabor Kovesdan 
457c66bbc91SGabor Kovesdan 				if (nums != 1)
458c66bbc91SGabor Kovesdan 					err(2, NULL);
459c66bbc91SGabor Kovesdan 				++printed;
460c66bbc91SGabor Kovesdan 			} else {
461c66bbc91SGabor Kovesdan 				int nums;
462c66bbc91SGabor Kovesdan 
463c66bbc91SGabor Kovesdan 				nums = fwprintf(f, L"%ls", s);
464c66bbc91SGabor Kovesdan 
465c66bbc91SGabor Kovesdan 				if (nums < 1)
466c66bbc91SGabor Kovesdan 					err(2, NULL);
467c66bbc91SGabor Kovesdan 				printed += nums;
468c66bbc91SGabor Kovesdan 			}
469c66bbc91SGabor Kovesdan 		}
470c66bbc91SGabor Kovesdan 		fwprintf(f, L"%lc", eols);
471c66bbc91SGabor Kovesdan 		return (printed + 1);
472c66bbc91SGabor Kovesdan 	}
473c66bbc91SGabor Kovesdan }
474c66bbc91SGabor Kovesdan 
475c66bbc91SGabor Kovesdan int
bwsncmp(const struct bwstring * bws1,const struct bwstring * bws2,size_t offset,size_t len)476c66bbc91SGabor Kovesdan bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
477c66bbc91SGabor Kovesdan     size_t offset, size_t len)
478c66bbc91SGabor Kovesdan {
479c66bbc91SGabor Kovesdan 	size_t cmp_len, len1, len2;
480d053fb22SAlex Richardson 	int res;
481c66bbc91SGabor Kovesdan 
482d053fb22SAlex Richardson 	len1 = BWSLEN(bws1);
483d053fb22SAlex Richardson 	len2 = BWSLEN(bws2);
484c66bbc91SGabor Kovesdan 
485c66bbc91SGabor Kovesdan 	if (len1 <= offset) {
486c66bbc91SGabor Kovesdan 		return ((len2 <= offset) ? 0 : -1);
487c66bbc91SGabor Kovesdan 	} else {
488c66bbc91SGabor Kovesdan 		if (len2 <= offset)
489c66bbc91SGabor Kovesdan 			return (+1);
490c66bbc91SGabor Kovesdan 		else {
491c66bbc91SGabor Kovesdan 			len1 -= offset;
492c66bbc91SGabor Kovesdan 			len2 -= offset;
493c66bbc91SGabor Kovesdan 
494c66bbc91SGabor Kovesdan 			cmp_len = len1;
495c66bbc91SGabor Kovesdan 
496c66bbc91SGabor Kovesdan 			if (len2 < cmp_len)
497c66bbc91SGabor Kovesdan 				cmp_len = len2;
498c66bbc91SGabor Kovesdan 
499c66bbc91SGabor Kovesdan 			if (len < cmp_len)
500c66bbc91SGabor Kovesdan 				cmp_len = len;
501c66bbc91SGabor Kovesdan 
50271ec05a2SCyril Zhang 			if (mb_cur_max == 1) {
503d053fb22SAlex Richardson 				const char *s1, *s2;
504c66bbc91SGabor Kovesdan 
505d053fb22SAlex Richardson 				s1 = bws1->cdata.str + offset;
506d053fb22SAlex Richardson 				s2 = bws2->cdata.str + offset;
507c66bbc91SGabor Kovesdan 
508c66bbc91SGabor Kovesdan 				res = memcmp(s1, s2, cmp_len);
509c66bbc91SGabor Kovesdan 
510c66bbc91SGabor Kovesdan 			} else {
511c66bbc91SGabor Kovesdan 				const wchar_t *s1, *s2;
512c66bbc91SGabor Kovesdan 
513d053fb22SAlex Richardson 				s1 = bws1->wdata.str + offset;
514d053fb22SAlex Richardson 				s2 = bws2->wdata.str + offset;
515c66bbc91SGabor Kovesdan 
516c66bbc91SGabor Kovesdan 				res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
517c66bbc91SGabor Kovesdan 			}
518c66bbc91SGabor Kovesdan 		}
519c66bbc91SGabor Kovesdan 	}
520c66bbc91SGabor Kovesdan 
521c66bbc91SGabor Kovesdan 	if (res == 0) {
522c66bbc91SGabor Kovesdan 		if (len1 < cmp_len && len1 < len2)
523c66bbc91SGabor Kovesdan 			res = -1;
524c66bbc91SGabor Kovesdan 		else if (len2 < cmp_len && len2 < len1)
525c66bbc91SGabor Kovesdan 			res = +1;
526c66bbc91SGabor Kovesdan 	}
527c66bbc91SGabor Kovesdan 
528c66bbc91SGabor Kovesdan 	return (res);
529c66bbc91SGabor Kovesdan }
530c66bbc91SGabor Kovesdan 
531c66bbc91SGabor Kovesdan int
bwscmp(const struct bwstring * bws1,const struct bwstring * bws2,size_t offset)532c66bbc91SGabor Kovesdan bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
533c66bbc91SGabor Kovesdan {
534c66bbc91SGabor Kovesdan 	size_t len1, len2, cmp_len;
535c66bbc91SGabor Kovesdan 	int res;
536c66bbc91SGabor Kovesdan 
537d053fb22SAlex Richardson 	len1 = BWSLEN(bws1);
538d053fb22SAlex Richardson 	len2 = BWSLEN(bws2);
539c66bbc91SGabor Kovesdan 
540c66bbc91SGabor Kovesdan 	len1 -= offset;
541c66bbc91SGabor Kovesdan 	len2 -= offset;
542c66bbc91SGabor Kovesdan 
543c66bbc91SGabor Kovesdan 	cmp_len = len1;
544c66bbc91SGabor Kovesdan 
545c66bbc91SGabor Kovesdan 	if (len2 < cmp_len)
546c66bbc91SGabor Kovesdan 		cmp_len = len2;
547c66bbc91SGabor Kovesdan 
548c66bbc91SGabor Kovesdan 	res = bwsncmp(bws1, bws2, offset, cmp_len);
549c66bbc91SGabor Kovesdan 
550c66bbc91SGabor Kovesdan 	if (res == 0) {
551c66bbc91SGabor Kovesdan 		if( len1 < len2)
552c66bbc91SGabor Kovesdan 			res = -1;
553c66bbc91SGabor Kovesdan 		else if (len2 < len1)
554c66bbc91SGabor Kovesdan 			res = +1;
555c66bbc91SGabor Kovesdan 	}
556c66bbc91SGabor Kovesdan 
557c66bbc91SGabor Kovesdan 	return (res);
558c66bbc91SGabor Kovesdan }
559c66bbc91SGabor Kovesdan 
560c66bbc91SGabor Kovesdan int
bws_iterator_cmp(bwstring_iterator iter1,bwstring_iterator iter2,size_t len)561c66bbc91SGabor Kovesdan bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
562c66bbc91SGabor Kovesdan {
563c66bbc91SGabor Kovesdan 	wchar_t c1, c2;
564d053fb22SAlex Richardson 	size_t i;
565c66bbc91SGabor Kovesdan 
566c66bbc91SGabor Kovesdan 	for (i = 0; i < len; ++i) {
567c66bbc91SGabor Kovesdan 		c1 = bws_get_iter_value(iter1);
568c66bbc91SGabor Kovesdan 		c2 = bws_get_iter_value(iter2);
569c66bbc91SGabor Kovesdan 		if (c1 != c2)
570c66bbc91SGabor Kovesdan 			return (c1 - c2);
571c66bbc91SGabor Kovesdan 		iter1 = bws_iterator_inc(iter1, 1);
572c66bbc91SGabor Kovesdan 		iter2 = bws_iterator_inc(iter2, 1);
573c66bbc91SGabor Kovesdan 	}
574c66bbc91SGabor Kovesdan 
575c66bbc91SGabor Kovesdan 	return (0);
576c66bbc91SGabor Kovesdan }
577c66bbc91SGabor Kovesdan 
578c66bbc91SGabor Kovesdan int
bwscoll(const struct bwstring * bws1,const struct bwstring * bws2,size_t offset)579c66bbc91SGabor Kovesdan bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
580c66bbc91SGabor Kovesdan {
581c66bbc91SGabor Kovesdan 	size_t len1, len2;
582c66bbc91SGabor Kovesdan 
583d053fb22SAlex Richardson 	len1 = BWSLEN(bws1);
584d053fb22SAlex Richardson 	len2 = BWSLEN(bws2);
585c66bbc91SGabor Kovesdan 
586c66bbc91SGabor Kovesdan 	if (len1 <= offset)
587c66bbc91SGabor Kovesdan 		return ((len2 <= offset) ? 0 : -1);
588c66bbc91SGabor Kovesdan 	else {
589c66bbc91SGabor Kovesdan 		if (len2 <= offset)
590c66bbc91SGabor Kovesdan 			return (+1);
591c66bbc91SGabor Kovesdan 		else {
592c66bbc91SGabor Kovesdan 			len1 -= offset;
593c66bbc91SGabor Kovesdan 			len2 -= offset;
594c66bbc91SGabor Kovesdan 
59571ec05a2SCyril Zhang 			if (mb_cur_max == 1) {
596d053fb22SAlex Richardson 				const char *s1, *s2;
597c66bbc91SGabor Kovesdan 
598d053fb22SAlex Richardson 				s1 = bws1->cdata.str + offset;
599d053fb22SAlex Richardson 				s2 = bws2->cdata.str + offset;
600c66bbc91SGabor Kovesdan 
601c66bbc91SGabor Kovesdan 				if (byte_sort) {
602d053fb22SAlex Richardson 					int res;
603c66bbc91SGabor Kovesdan 
604c66bbc91SGabor Kovesdan 					if (len1 > len2) {
605c66bbc91SGabor Kovesdan 						res = memcmp(s1, s2, len2);
606c66bbc91SGabor Kovesdan 						if (!res)
607c66bbc91SGabor Kovesdan 							res = +1;
608c66bbc91SGabor Kovesdan 					} else if (len1 < len2) {
609c66bbc91SGabor Kovesdan 						res = memcmp(s1, s2, len1);
610c66bbc91SGabor Kovesdan 						if (!res)
611c66bbc91SGabor Kovesdan 							res = -1;
612c66bbc91SGabor Kovesdan 					} else
613c66bbc91SGabor Kovesdan 						res = memcmp(s1, s2, len1);
614c66bbc91SGabor Kovesdan 
615c66bbc91SGabor Kovesdan 					return (res);
616c66bbc91SGabor Kovesdan 
617c66bbc91SGabor Kovesdan 				} else {
618d053fb22SAlex Richardson 					int res;
619c66bbc91SGabor Kovesdan 					size_t i, maxlen;
620c66bbc91SGabor Kovesdan 
621c66bbc91SGabor Kovesdan 					i = 0;
622c66bbc91SGabor Kovesdan 					maxlen = len1;
623c66bbc91SGabor Kovesdan 
624c66bbc91SGabor Kovesdan 					if (maxlen > len2)
625c66bbc91SGabor Kovesdan 						maxlen = len2;
626c66bbc91SGabor Kovesdan 
627c66bbc91SGabor Kovesdan 					while (i < maxlen) {
628c66bbc91SGabor Kovesdan 						/* goto next non-zero part: */
629c66bbc91SGabor Kovesdan 						while ((i < maxlen) &&
630c66bbc91SGabor Kovesdan 						    !s1[i] && !s2[i])
631c66bbc91SGabor Kovesdan 							++i;
632c66bbc91SGabor Kovesdan 
633c66bbc91SGabor Kovesdan 						if (i >= maxlen)
634c66bbc91SGabor Kovesdan 							break;
635c66bbc91SGabor Kovesdan 
636c66bbc91SGabor Kovesdan 						if (s1[i] == 0) {
637c66bbc91SGabor Kovesdan 							if (s2[i] == 0)
638c66bbc91SGabor Kovesdan 								/* NOTREACHED */
639c66bbc91SGabor Kovesdan 								err(2, "bwscoll error 01");
640c66bbc91SGabor Kovesdan 							else
641c66bbc91SGabor Kovesdan 								return (-1);
642c66bbc91SGabor Kovesdan 						} else if (s2[i] == 0)
643c66bbc91SGabor Kovesdan 							return (+1);
644c66bbc91SGabor Kovesdan 
645e8da8c74SGabor Kovesdan 						res = strcoll((const char*)(s1 + i), (const char*)(s2 + i));
646c66bbc91SGabor Kovesdan 						if (res)
647c66bbc91SGabor Kovesdan 							return (res);
648c66bbc91SGabor Kovesdan 
649c66bbc91SGabor Kovesdan 						while ((i < maxlen) &&
650c66bbc91SGabor Kovesdan 						    s1[i] && s2[i])
651c66bbc91SGabor Kovesdan 							++i;
652c66bbc91SGabor Kovesdan 
653c66bbc91SGabor Kovesdan 						if (i >= maxlen)
654c66bbc91SGabor Kovesdan 							break;
655c66bbc91SGabor Kovesdan 
656c66bbc91SGabor Kovesdan 						if (s1[i] == 0) {
657c66bbc91SGabor Kovesdan 							if (s2[i] == 0) {
658c66bbc91SGabor Kovesdan 								++i;
659c66bbc91SGabor Kovesdan 								continue;
660c66bbc91SGabor Kovesdan 							} else
661c66bbc91SGabor Kovesdan 								return (-1);
662c66bbc91SGabor Kovesdan 						} else if (s2[i] == 0)
663c66bbc91SGabor Kovesdan 							return (+1);
664c66bbc91SGabor Kovesdan 						else
665c66bbc91SGabor Kovesdan 							/* NOTREACHED */
666c66bbc91SGabor Kovesdan 							err(2, "bwscoll error 02");
667c66bbc91SGabor Kovesdan 					}
668c66bbc91SGabor Kovesdan 
669c66bbc91SGabor Kovesdan 					if (len1 < len2)
670c66bbc91SGabor Kovesdan 						return (-1);
671c66bbc91SGabor Kovesdan 					else if (len1 > len2)
672c66bbc91SGabor Kovesdan 						return (+1);
673c66bbc91SGabor Kovesdan 
674c66bbc91SGabor Kovesdan 					return (0);
675c66bbc91SGabor Kovesdan 				}
676c66bbc91SGabor Kovesdan 			} else {
677c66bbc91SGabor Kovesdan 				const wchar_t *s1, *s2;
678c66bbc91SGabor Kovesdan 				size_t i, maxlen;
679d053fb22SAlex Richardson 				int res;
680c66bbc91SGabor Kovesdan 
681d053fb22SAlex Richardson 				s1 = bws1->wdata.str + offset;
682d053fb22SAlex Richardson 				s2 = bws2->wdata.str + offset;
683c66bbc91SGabor Kovesdan 
684c66bbc91SGabor Kovesdan 				i = 0;
685c66bbc91SGabor Kovesdan 				maxlen = len1;
686c66bbc91SGabor Kovesdan 
687c66bbc91SGabor Kovesdan 				if (maxlen > len2)
688c66bbc91SGabor Kovesdan 					maxlen = len2;
689c66bbc91SGabor Kovesdan 
690c66bbc91SGabor Kovesdan 				while (i < maxlen) {
691c66bbc91SGabor Kovesdan 
692c66bbc91SGabor Kovesdan 					/* goto next non-zero part: */
693c66bbc91SGabor Kovesdan 					while ((i < maxlen) &&
694c66bbc91SGabor Kovesdan 					    !s1[i] && !s2[i])
695c66bbc91SGabor Kovesdan 						++i;
696c66bbc91SGabor Kovesdan 
697c66bbc91SGabor Kovesdan 					if (i >= maxlen)
698c66bbc91SGabor Kovesdan 						break;
699c66bbc91SGabor Kovesdan 
700c66bbc91SGabor Kovesdan 					if (s1[i] == 0) {
701c66bbc91SGabor Kovesdan 						if (s2[i] == 0)
702c66bbc91SGabor Kovesdan 							/* NOTREACHED */
703c66bbc91SGabor Kovesdan 							err(2, "bwscoll error 1");
704c66bbc91SGabor Kovesdan 						else
705c66bbc91SGabor Kovesdan 							return (-1);
706c66bbc91SGabor Kovesdan 					} else if (s2[i] == 0)
707c66bbc91SGabor Kovesdan 						return (+1);
708c66bbc91SGabor Kovesdan 
709c66bbc91SGabor Kovesdan 					res = wide_str_coll(s1 + i, s2 + i);
710c66bbc91SGabor Kovesdan 					if (res)
711c66bbc91SGabor Kovesdan 						return (res);
712c66bbc91SGabor Kovesdan 
713c66bbc91SGabor Kovesdan 					while ((i < maxlen) && s1[i] && s2[i])
714c66bbc91SGabor Kovesdan 						++i;
715c66bbc91SGabor Kovesdan 
716c66bbc91SGabor Kovesdan 					if (i >= maxlen)
717c66bbc91SGabor Kovesdan 						break;
718c66bbc91SGabor Kovesdan 
719c66bbc91SGabor Kovesdan 					if (s1[i] == 0) {
720c66bbc91SGabor Kovesdan 						if (s2[i] == 0) {
721c66bbc91SGabor Kovesdan 							++i;
722c66bbc91SGabor Kovesdan 							continue;
723c66bbc91SGabor Kovesdan 						} else
724c66bbc91SGabor Kovesdan 							return (-1);
725c66bbc91SGabor Kovesdan 					} else if (s2[i] == 0)
726c66bbc91SGabor Kovesdan 						return (+1);
727c66bbc91SGabor Kovesdan 					else
728c66bbc91SGabor Kovesdan 						/* NOTREACHED */
729c66bbc91SGabor Kovesdan 						err(2, "bwscoll error 2");
730c66bbc91SGabor Kovesdan 				}
731c66bbc91SGabor Kovesdan 
732c66bbc91SGabor Kovesdan 				if (len1 < len2)
733c66bbc91SGabor Kovesdan 					return (-1);
734c66bbc91SGabor Kovesdan 				else if (len1 > len2)
735c66bbc91SGabor Kovesdan 					return (+1);
736c66bbc91SGabor Kovesdan 
737c66bbc91SGabor Kovesdan 				return (0);
738c66bbc91SGabor Kovesdan 			}
739c66bbc91SGabor Kovesdan 		}
740c66bbc91SGabor Kovesdan 	}
741c66bbc91SGabor Kovesdan }
742c66bbc91SGabor Kovesdan 
743c66bbc91SGabor Kovesdan /*
744c66bbc91SGabor Kovesdan  * Correction of the system API
745c66bbc91SGabor Kovesdan  */
746c66bbc91SGabor Kovesdan double
bwstod(struct bwstring * s0,bool * empty)747c66bbc91SGabor Kovesdan bwstod(struct bwstring *s0, bool *empty)
748c66bbc91SGabor Kovesdan {
749d053fb22SAlex Richardson 	double ret;
750c66bbc91SGabor Kovesdan 
75171ec05a2SCyril Zhang 	if (mb_cur_max == 1) {
752d053fb22SAlex Richardson 		char *end, *s;
753c66bbc91SGabor Kovesdan 		char *ep;
754c66bbc91SGabor Kovesdan 
755d053fb22SAlex Richardson 		s = s0->cdata.str;
756d053fb22SAlex Richardson 		end = s + s0->cdata.len;
757c66bbc91SGabor Kovesdan 		ep = NULL;
758c66bbc91SGabor Kovesdan 
759c66bbc91SGabor Kovesdan 		while (isblank(*s) && s < end)
760c66bbc91SGabor Kovesdan 			++s;
761c66bbc91SGabor Kovesdan 
762c66bbc91SGabor Kovesdan 		if (!isprint(*s)) {
763c66bbc91SGabor Kovesdan 			*empty = true;
764c66bbc91SGabor Kovesdan 			return (0);
765c66bbc91SGabor Kovesdan 		}
766c66bbc91SGabor Kovesdan 
767e8da8c74SGabor Kovesdan 		ret = strtod((char*)s, &ep);
768d053fb22SAlex Richardson 		if (ep == s) {
769c66bbc91SGabor Kovesdan 			*empty = true;
770c66bbc91SGabor Kovesdan 			return (0);
771c66bbc91SGabor Kovesdan 		}
772c66bbc91SGabor Kovesdan 	} else {
773c66bbc91SGabor Kovesdan 		wchar_t *end, *ep, *s;
774c66bbc91SGabor Kovesdan 
775d053fb22SAlex Richardson 		s = s0->wdata.str;
776d053fb22SAlex Richardson 		end = s + s0->wdata.len;
777c66bbc91SGabor Kovesdan 		ep = NULL;
778c66bbc91SGabor Kovesdan 
779c66bbc91SGabor Kovesdan 		while (iswblank(*s) && s < end)
780c66bbc91SGabor Kovesdan 			++s;
781c66bbc91SGabor Kovesdan 
782c66bbc91SGabor Kovesdan 		if (!iswprint(*s)) {
783c66bbc91SGabor Kovesdan 			*empty = true;
784c66bbc91SGabor Kovesdan 			return (0);
785c66bbc91SGabor Kovesdan 		}
786c66bbc91SGabor Kovesdan 
787c66bbc91SGabor Kovesdan 		ret = wcstod(s, &ep);
788c66bbc91SGabor Kovesdan 		if (ep == s) {
789c66bbc91SGabor Kovesdan 			*empty = true;
790c66bbc91SGabor Kovesdan 			return (0);
791c66bbc91SGabor Kovesdan 		}
792c66bbc91SGabor Kovesdan 	}
793c66bbc91SGabor Kovesdan 
794c66bbc91SGabor Kovesdan 	*empty = false;
795c66bbc91SGabor Kovesdan 	return (ret);
796c66bbc91SGabor Kovesdan }
797c66bbc91SGabor Kovesdan 
798c66bbc91SGabor Kovesdan /*
799c66bbc91SGabor Kovesdan  * A helper function for monthcoll.  If a line matches
800c66bbc91SGabor Kovesdan  * a month name, it returns (number of the month - 1),
801c66bbc91SGabor Kovesdan  * while if there is no match, it just return -1.
802c66bbc91SGabor Kovesdan  */
803c66bbc91SGabor Kovesdan 
804c66bbc91SGabor Kovesdan int
bws_month_score(const struct bwstring * s0)805c66bbc91SGabor Kovesdan bws_month_score(const struct bwstring *s0)
806c66bbc91SGabor Kovesdan {
807e5f71a07SPedro F. Giffuni 
80871ec05a2SCyril Zhang 	if (mb_cur_max == 1) {
809d053fb22SAlex Richardson 		const char *end, *s;
810c66bbc91SGabor Kovesdan 
811d053fb22SAlex Richardson 		s = s0->cdata.str;
812d053fb22SAlex Richardson 		end = s + s0->cdata.len;
813c66bbc91SGabor Kovesdan 
814c66bbc91SGabor Kovesdan 		while (isblank(*s) && s < end)
815c66bbc91SGabor Kovesdan 			++s;
816c66bbc91SGabor Kovesdan 
817c66bbc91SGabor Kovesdan 		for (int i = 11; i >= 0; --i) {
8183d44dce9SChristos Margiolis 			if (cmonths[i].mon && (s == strstr(s, cmonths[i].mon)))
8193d44dce9SChristos Margiolis 				return (i);
8203d44dce9SChristos Margiolis 			if (cmonths[i].ab && (s == strstr(s, cmonths[i].ab)))
8213d44dce9SChristos Margiolis 				return (i);
8223d44dce9SChristos Margiolis 			if (cmonths[i].alt && (s == strstr(s, cmonths[i].alt)))
823c66bbc91SGabor Kovesdan 				return (i);
824c66bbc91SGabor Kovesdan 		}
825c66bbc91SGabor Kovesdan 
826c66bbc91SGabor Kovesdan 	} else {
827c66bbc91SGabor Kovesdan 		const wchar_t *end, *s;
828c66bbc91SGabor Kovesdan 
829d053fb22SAlex Richardson 		s = s0->wdata.str;
830d053fb22SAlex Richardson 		end = s + s0->wdata.len;
831c66bbc91SGabor Kovesdan 
832c66bbc91SGabor Kovesdan 		while (iswblank(*s) && s < end)
833c66bbc91SGabor Kovesdan 			++s;
834c66bbc91SGabor Kovesdan 
835c66bbc91SGabor Kovesdan 		for (int i = 11; i >= 0; --i) {
8363d44dce9SChristos Margiolis 			if (wmonths[i].ab && (s == wcsstr(s, wmonths[i].ab)))
8373d44dce9SChristos Margiolis 				return (i);
8383d44dce9SChristos Margiolis 			if (wmonths[i].mon && (s == wcsstr(s, wmonths[i].mon)))
8393d44dce9SChristos Margiolis 				return (i);
8403d44dce9SChristos Margiolis 			if (wmonths[i].alt && (s == wcsstr(s, wmonths[i].alt)))
841c66bbc91SGabor Kovesdan 				return (i);
842c66bbc91SGabor Kovesdan 		}
843c66bbc91SGabor Kovesdan 	}
844c66bbc91SGabor Kovesdan 
845c66bbc91SGabor Kovesdan 	return (-1);
846c66bbc91SGabor Kovesdan }
847c66bbc91SGabor Kovesdan 
848c66bbc91SGabor Kovesdan /*
849c66bbc91SGabor Kovesdan  * Rips out leading blanks (-b).
850c66bbc91SGabor Kovesdan  */
851c66bbc91SGabor Kovesdan struct bwstring *
ignore_leading_blanks(struct bwstring * str)852c66bbc91SGabor Kovesdan ignore_leading_blanks(struct bwstring *str)
853c66bbc91SGabor Kovesdan {
854c66bbc91SGabor Kovesdan 
85571ec05a2SCyril Zhang 	if (mb_cur_max == 1) {
856d053fb22SAlex Richardson 		char *dst, *end, *src;
857c66bbc91SGabor Kovesdan 
858d053fb22SAlex Richardson 		src = str->cdata.str;
859c66bbc91SGabor Kovesdan 		dst = src;
860d053fb22SAlex Richardson 		end = src + str->cdata.len;
861c66bbc91SGabor Kovesdan 
862c66bbc91SGabor Kovesdan 		while (src < end && isblank(*src))
863c66bbc91SGabor Kovesdan 			++src;
864c66bbc91SGabor Kovesdan 
865c66bbc91SGabor Kovesdan 		if (src != dst) {
866c66bbc91SGabor Kovesdan 			size_t newlen;
867c66bbc91SGabor Kovesdan 
868c66bbc91SGabor Kovesdan 			newlen = BWSLEN(str) - (src - dst);
869c66bbc91SGabor Kovesdan 
870c66bbc91SGabor Kovesdan 			while (src < end) {
871c66bbc91SGabor Kovesdan 				*dst = *src;
872c66bbc91SGabor Kovesdan 				++dst;
873c66bbc91SGabor Kovesdan 				++src;
874c66bbc91SGabor Kovesdan 			}
875c66bbc91SGabor Kovesdan 			bws_setlen(str, newlen);
876c66bbc91SGabor Kovesdan 		}
877c66bbc91SGabor Kovesdan 	} else {
878c66bbc91SGabor Kovesdan 		wchar_t *dst, *end, *src;
879c66bbc91SGabor Kovesdan 
880d053fb22SAlex Richardson 		src = str->wdata.str;
881c66bbc91SGabor Kovesdan 		dst = src;
882d053fb22SAlex Richardson 		end = src + str->wdata.len;
883c66bbc91SGabor Kovesdan 
884c66bbc91SGabor Kovesdan 		while (src < end && iswblank(*src))
885c66bbc91SGabor Kovesdan 			++src;
886c66bbc91SGabor Kovesdan 
887c66bbc91SGabor Kovesdan 		if (src != dst) {
888c66bbc91SGabor Kovesdan 
889c66bbc91SGabor Kovesdan 			size_t newlen = BWSLEN(str) - (src - dst);
890c66bbc91SGabor Kovesdan 
891c66bbc91SGabor Kovesdan 			while (src < end) {
892c66bbc91SGabor Kovesdan 				*dst = *src;
893c66bbc91SGabor Kovesdan 				++dst;
894c66bbc91SGabor Kovesdan 				++src;
895c66bbc91SGabor Kovesdan 			}
896c66bbc91SGabor Kovesdan 			bws_setlen(str, newlen);
897c66bbc91SGabor Kovesdan 
898c66bbc91SGabor Kovesdan 		}
899c66bbc91SGabor Kovesdan 	}
900c66bbc91SGabor Kovesdan 	return (str);
901c66bbc91SGabor Kovesdan }
902c66bbc91SGabor Kovesdan 
903c66bbc91SGabor Kovesdan /*
904c66bbc91SGabor Kovesdan  * Rips out nonprinting characters (-i).
905c66bbc91SGabor Kovesdan  */
906c66bbc91SGabor Kovesdan struct bwstring *
ignore_nonprinting(struct bwstring * str)907c66bbc91SGabor Kovesdan ignore_nonprinting(struct bwstring *str)
908c66bbc91SGabor Kovesdan {
909d053fb22SAlex Richardson 	size_t newlen = BWSLEN(str);
910c66bbc91SGabor Kovesdan 
91171ec05a2SCyril Zhang 	if (mb_cur_max == 1) {
912d053fb22SAlex Richardson 		char *dst, *end, *src;
913d053fb22SAlex Richardson 		char c;
914c66bbc91SGabor Kovesdan 
915d053fb22SAlex Richardson 		src = str->cdata.str;
916c66bbc91SGabor Kovesdan 		dst = src;
917d053fb22SAlex Richardson 		end = src + str->cdata.len;
918c66bbc91SGabor Kovesdan 
919c66bbc91SGabor Kovesdan 		while (src < end) {
920c66bbc91SGabor Kovesdan 			c = *src;
921c66bbc91SGabor Kovesdan 			if (isprint(c)) {
922c66bbc91SGabor Kovesdan 				*dst = c;
923c66bbc91SGabor Kovesdan 				++dst;
924c66bbc91SGabor Kovesdan 				++src;
925c66bbc91SGabor Kovesdan 			} else {
926c66bbc91SGabor Kovesdan 				++src;
927c66bbc91SGabor Kovesdan 				--newlen;
928c66bbc91SGabor Kovesdan 			}
929c66bbc91SGabor Kovesdan 		}
930c66bbc91SGabor Kovesdan 	} else {
931c66bbc91SGabor Kovesdan 		wchar_t *dst, *end, *src;
932c66bbc91SGabor Kovesdan 		wchar_t c;
933c66bbc91SGabor Kovesdan 
934d053fb22SAlex Richardson 		src = str->wdata.str;
935c66bbc91SGabor Kovesdan 		dst = src;
936d053fb22SAlex Richardson 		end = src + str->wdata.len;
937c66bbc91SGabor Kovesdan 
938c66bbc91SGabor Kovesdan 		while (src < end) {
939c66bbc91SGabor Kovesdan 			c = *src;
940c66bbc91SGabor Kovesdan 			if (iswprint(c)) {
941c66bbc91SGabor Kovesdan 				*dst = c;
942c66bbc91SGabor Kovesdan 				++dst;
943c66bbc91SGabor Kovesdan 				++src;
944c66bbc91SGabor Kovesdan 			} else {
945c66bbc91SGabor Kovesdan 				++src;
946c66bbc91SGabor Kovesdan 				--newlen;
947c66bbc91SGabor Kovesdan 			}
948c66bbc91SGabor Kovesdan 		}
949c66bbc91SGabor Kovesdan 	}
950c66bbc91SGabor Kovesdan 	bws_setlen(str, newlen);
951c66bbc91SGabor Kovesdan 
952c66bbc91SGabor Kovesdan 	return (str);
953c66bbc91SGabor Kovesdan }
954c66bbc91SGabor Kovesdan 
955c66bbc91SGabor Kovesdan /*
956c66bbc91SGabor Kovesdan  * Rips out any characters that are not alphanumeric characters
957c66bbc91SGabor Kovesdan  * nor blanks (-d).
958c66bbc91SGabor Kovesdan  */
959c66bbc91SGabor Kovesdan struct bwstring *
dictionary_order(struct bwstring * str)960c66bbc91SGabor Kovesdan dictionary_order(struct bwstring *str)
961c66bbc91SGabor Kovesdan {
962d053fb22SAlex Richardson 	size_t newlen = BWSLEN(str);
963c66bbc91SGabor Kovesdan 
96471ec05a2SCyril Zhang 	if (mb_cur_max == 1) {
965d053fb22SAlex Richardson 		char *dst, *end, *src;
966d053fb22SAlex Richardson 		char c;
967c66bbc91SGabor Kovesdan 
968d053fb22SAlex Richardson 		src = str->cdata.str;
969c66bbc91SGabor Kovesdan 		dst = src;
970d053fb22SAlex Richardson 		end = src + str->cdata.len;
971c66bbc91SGabor Kovesdan 
972c66bbc91SGabor Kovesdan 		while (src < end) {
973c66bbc91SGabor Kovesdan 			c = *src;
974c66bbc91SGabor Kovesdan 			if (isalnum(c) || isblank(c)) {
975c66bbc91SGabor Kovesdan 				*dst = c;
976c66bbc91SGabor Kovesdan 				++dst;
977c66bbc91SGabor Kovesdan 				++src;
978c66bbc91SGabor Kovesdan 			} else {
979c66bbc91SGabor Kovesdan 				++src;
980c66bbc91SGabor Kovesdan 				--newlen;
981c66bbc91SGabor Kovesdan 			}
982c66bbc91SGabor Kovesdan 		}
983c66bbc91SGabor Kovesdan 	} else {
984c66bbc91SGabor Kovesdan 		wchar_t *dst, *end, *src;
985c66bbc91SGabor Kovesdan 		wchar_t c;
986c66bbc91SGabor Kovesdan 
987d053fb22SAlex Richardson 		src = str->wdata.str;
988c66bbc91SGabor Kovesdan 		dst = src;
989d053fb22SAlex Richardson 		end = src + str->wdata.len;
990c66bbc91SGabor Kovesdan 
991c66bbc91SGabor Kovesdan 		while (src < end) {
992c66bbc91SGabor Kovesdan 			c = *src;
993c66bbc91SGabor Kovesdan 			if (iswalnum(c) || iswblank(c)) {
994c66bbc91SGabor Kovesdan 				*dst = c;
995c66bbc91SGabor Kovesdan 				++dst;
996c66bbc91SGabor Kovesdan 				++src;
997c66bbc91SGabor Kovesdan 			} else {
998c66bbc91SGabor Kovesdan 				++src;
999c66bbc91SGabor Kovesdan 				--newlen;
1000c66bbc91SGabor Kovesdan 			}
1001c66bbc91SGabor Kovesdan 		}
1002c66bbc91SGabor Kovesdan 	}
1003c66bbc91SGabor Kovesdan 	bws_setlen(str, newlen);
1004c66bbc91SGabor Kovesdan 
1005c66bbc91SGabor Kovesdan 	return (str);
1006c66bbc91SGabor Kovesdan }
1007c66bbc91SGabor Kovesdan 
1008c66bbc91SGabor Kovesdan /*
1009c66bbc91SGabor Kovesdan  * Converts string to lower case(-f).
1010c66bbc91SGabor Kovesdan  */
1011c66bbc91SGabor Kovesdan struct bwstring *
ignore_case(struct bwstring * str)1012c66bbc91SGabor Kovesdan ignore_case(struct bwstring *str)
1013c66bbc91SGabor Kovesdan {
1014e5f71a07SPedro F. Giffuni 
101571ec05a2SCyril Zhang 	if (mb_cur_max == 1) {
1016d053fb22SAlex Richardson 		char *end, *s;
1017c66bbc91SGabor Kovesdan 
1018d053fb22SAlex Richardson 		s = str->cdata.str;
1019d053fb22SAlex Richardson 		end = s + str->cdata.len;
1020c66bbc91SGabor Kovesdan 
1021c66bbc91SGabor Kovesdan 		while (s < end) {
1022c66bbc91SGabor Kovesdan 			*s = toupper(*s);
1023c66bbc91SGabor Kovesdan 			++s;
1024c66bbc91SGabor Kovesdan 		}
1025c66bbc91SGabor Kovesdan 	} else {
1026c66bbc91SGabor Kovesdan 		wchar_t *end, *s;
1027c66bbc91SGabor Kovesdan 
1028d053fb22SAlex Richardson 		s = str->wdata.str;
1029d053fb22SAlex Richardson 		end = s + str->wdata.len;
1030c66bbc91SGabor Kovesdan 
1031c66bbc91SGabor Kovesdan 		while (s < end) {
1032c66bbc91SGabor Kovesdan 			*s = towupper(*s);
1033c66bbc91SGabor Kovesdan 			++s;
1034c66bbc91SGabor Kovesdan 		}
1035c66bbc91SGabor Kovesdan 	}
1036c66bbc91SGabor Kovesdan 	return (str);
1037c66bbc91SGabor Kovesdan }
1038c66bbc91SGabor Kovesdan 
1039c66bbc91SGabor Kovesdan void
bws_disorder_warnx(struct bwstring * s,const char * fn,size_t pos)1040c66bbc91SGabor Kovesdan bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
1041c66bbc91SGabor Kovesdan {
1042e5f71a07SPedro F. Giffuni 
104371ec05a2SCyril Zhang 	if (mb_cur_max == 1)
1044d053fb22SAlex Richardson 		warnx("%s:%zu: disorder: %s", fn, pos + 1, s->cdata.str);
1045c66bbc91SGabor Kovesdan 	else
1046d053fb22SAlex Richardson 		warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->wdata.str);
1047c66bbc91SGabor Kovesdan }
1048