xref: /openbsd-src/usr.bin/sort/bwstring.c (revision 479c151d3429b7cfa6228ee428d945620629789d)
1*479c151dSjsg /*	$OpenBSD: bwstring.c,v 1.10 2024/09/20 02:00:46 jsg Exp $	*/
279428148Smillert 
379428148Smillert /*-
479428148Smillert  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
579428148Smillert  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
679428148Smillert  * All rights reserved.
779428148Smillert  *
879428148Smillert  * Redistribution and use in source and binary forms, with or without
979428148Smillert  * modification, are permitted provided that the following conditions
1079428148Smillert  * are met:
1179428148Smillert  * 1. Redistributions of source code must retain the above copyright
1279428148Smillert  *    notice, this list of conditions and the following disclaimer.
1379428148Smillert  * 2. Redistributions in binary form must reproduce the above copyright
1479428148Smillert  *    notice, this list of conditions and the following disclaimer in the
1579428148Smillert  *    documentation and/or other materials provided with the distribution.
1679428148Smillert  *
1779428148Smillert  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1879428148Smillert  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1979428148Smillert  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2079428148Smillert  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2179428148Smillert  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2279428148Smillert  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2379428148Smillert  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2479428148Smillert  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2579428148Smillert  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2679428148Smillert  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2779428148Smillert  * SUCH DAMAGE.
2879428148Smillert  */
2979428148Smillert 
3079428148Smillert #include <ctype.h>
3179428148Smillert #include <errno.h>
3279428148Smillert #include <err.h>
3379428148Smillert #include <langinfo.h>
3479428148Smillert #include <math.h>
3579428148Smillert #include <stdlib.h>
3679428148Smillert #include <string.h>
3779428148Smillert #include <wchar.h>
3879428148Smillert #include <wctype.h>
3979428148Smillert 
4079428148Smillert #include "bwstring.h"
4179428148Smillert #include "sort.h"
4279428148Smillert 
4379428148Smillert static wchar_t **wmonths;
4479428148Smillert static char **cmonths;
4579428148Smillert 
4679428148Smillert /* initialise months */
4779428148Smillert 
4879428148Smillert void
4979428148Smillert initialise_months(void)
5079428148Smillert {
5179428148Smillert 	const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
5279428148Smillert 	    ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
5379428148Smillert 	    ABMON_11, ABMON_12 };
5479428148Smillert 	char *tmp;
5579428148Smillert 	size_t len;
5679428148Smillert 
5779428148Smillert 	if (sort_mb_cur_max == 1) {
5879428148Smillert 		if (cmonths == NULL) {
5979428148Smillert 			char *m;
6079428148Smillert 			unsigned int j;
6179428148Smillert 			int i;
6279428148Smillert 
6379428148Smillert 			cmonths = sort_malloc(sizeof(char *) * 12);
6479428148Smillert 			for (i = 0; i < 12; i++) {
6579428148Smillert 				cmonths[i] = NULL;
6679428148Smillert 				tmp = nl_langinfo(item[i]);
6779428148Smillert 				if (debug_sort)
6879428148Smillert 					printf("month[%d]=%s\n", i, tmp);
6976dd1399Smillert 				if (*tmp == '\0')
7079428148Smillert 					continue;
7176dd1399Smillert 				m = sort_strdup(tmp);
72b7326289Smillert 				len = strlen(tmp);
7379428148Smillert 				for (j = 0; j < len; j++)
7479428148Smillert 					m[j] = toupper(m[j]);
7579428148Smillert 				cmonths[i] = m;
7679428148Smillert 			}
7779428148Smillert 		}
7879428148Smillert 	} else {
7979428148Smillert 		if (wmonths == NULL) {
8079428148Smillert 			unsigned int j;
8179428148Smillert 			wchar_t *m;
8279428148Smillert 			int i;
8379428148Smillert 
8479428148Smillert 			wmonths = sort_malloc(sizeof(wchar_t *) * 12);
8579428148Smillert 			for (i = 0; i < 12; i++) {
8679428148Smillert 				wmonths[i] = NULL;
8779428148Smillert 				tmp = nl_langinfo(item[i]);
8879428148Smillert 				if (debug_sort)
8979428148Smillert 					printf("month[%d]=%s\n", i, tmp);
9076dd1399Smillert 				if (*tmp == '\0')
9176dd1399Smillert 					continue;
9279428148Smillert 				len = strlen(tmp);
939c2d5b19Smillert 				m = sort_reallocarray(NULL, len + 1,
949c2d5b19Smillert 				    sizeof(wchar_t));
9576dd1399Smillert 				if (mbstowcs(m, tmp, len) == (size_t)-1) {
9676dd1399Smillert 					sort_free(m);
9779428148Smillert 					continue;
9876dd1399Smillert 				}
9979428148Smillert 				m[len] = L'\0';
10079428148Smillert 				for (j = 0; j < len; j++)
10179428148Smillert 					m[j] = towupper(m[j]);
10279428148Smillert 				wmonths[i] = m;
10379428148Smillert 			}
10479428148Smillert 		}
10579428148Smillert 	}
10679428148Smillert }
10779428148Smillert 
10879428148Smillert /*
10979428148Smillert  * Compare two wide-character strings
11079428148Smillert  */
11179428148Smillert static int
11279428148Smillert wide_str_coll(const wchar_t *s1, const wchar_t *s2)
11379428148Smillert {
11479428148Smillert 	int ret = 0;
11579428148Smillert 
11679428148Smillert 	errno = 0;
11779428148Smillert 	ret = wcscoll(s1, s2);
11879428148Smillert 	if (errno == EILSEQ) {
11979428148Smillert 		errno = 0;
12079428148Smillert 		ret = wcscmp(s1, s2);
12179428148Smillert 		if (errno != 0) {
12279428148Smillert 			size_t i;
12379428148Smillert 			for (i = 0; ; ++i) {
12479428148Smillert 				wchar_t c1 = s1[i];
12579428148Smillert 				wchar_t c2 = s2[i];
12679428148Smillert 				if (c1 == L'\0')
12779428148Smillert 					return (c2 == L'\0') ? 0 : -1;
12879428148Smillert 				if (c2 == L'\0')
12979428148Smillert 					return 1;
13079428148Smillert 				if (c1 == c2)
13179428148Smillert 					continue;
13279428148Smillert 				return (int)c1 - (int)c2;
13379428148Smillert 			}
13479428148Smillert 		}
13579428148Smillert 	}
13679428148Smillert 	return ret;
13779428148Smillert }
13879428148Smillert 
13979428148Smillert /* counterparts of wcs functions */
14079428148Smillert 
14179428148Smillert void
14279428148Smillert bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
14379428148Smillert {
14479428148Smillert 	if (sort_mb_cur_max == 1)
14579428148Smillert 		fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
14679428148Smillert 	else
14779428148Smillert 		fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
14879428148Smillert }
14979428148Smillert 
15079428148Smillert const void *
15179428148Smillert bwsrawdata(const struct bwstring *bws)
15279428148Smillert {
15379428148Smillert 	return &(bws->data);
15479428148Smillert }
15579428148Smillert 
15679428148Smillert size_t
15779428148Smillert bwsrawlen(const struct bwstring *bws)
15879428148Smillert {
15979428148Smillert 	return (sort_mb_cur_max == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len);
16079428148Smillert }
16179428148Smillert 
16279428148Smillert size_t
16379428148Smillert bws_memsize(const struct bwstring *bws)
16479428148Smillert {
16579428148Smillert 	return (sort_mb_cur_max == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
16679428148Smillert 	    (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring));
16779428148Smillert }
16879428148Smillert 
16979428148Smillert void
17079428148Smillert bws_setlen(struct bwstring *bws, size_t newlen)
17179428148Smillert {
17279428148Smillert 	if (bws && newlen != bws->len && newlen <= bws->len) {
17379428148Smillert 		bws->len = newlen;
17479428148Smillert 		if (sort_mb_cur_max == 1)
17579428148Smillert 			bws->data.cstr[newlen] = '\0';
17679428148Smillert 		else
17779428148Smillert 			bws->data.wstr[newlen] = L'\0';
17879428148Smillert 	}
17979428148Smillert }
18079428148Smillert 
18179428148Smillert /*
18279428148Smillert  * Allocate a new binary string of specified size
18379428148Smillert  */
18479428148Smillert struct bwstring *
18579428148Smillert bwsalloc(size_t sz)
18679428148Smillert {
18779428148Smillert 	struct bwstring *ret;
18879428148Smillert 
18979428148Smillert 	if (sort_mb_cur_max == 1) {
19079428148Smillert 		ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
19179428148Smillert 		ret->data.cstr[sz] = '\0';
19279428148Smillert 	} else {
19379428148Smillert 		ret = sort_malloc(sizeof(struct bwstring) +
19479428148Smillert 		    SIZEOF_WCHAR_STRING(sz + 1));
19579428148Smillert 		ret->data.wstr[sz] = L'\0';
19679428148Smillert 	}
19779428148Smillert 	ret->len = sz;
19879428148Smillert 
19979428148Smillert 	return ret;
20079428148Smillert }
20179428148Smillert 
20279428148Smillert /*
20379428148Smillert  * Create a copy of binary string.
20479428148Smillert  * New string size equals the length of the old string.
20579428148Smillert  */
20679428148Smillert struct bwstring *
20779428148Smillert bwsdup(const struct bwstring *s)
20879428148Smillert {
20979428148Smillert 	struct bwstring *ret;
21079428148Smillert 
21179428148Smillert 	if (s == NULL)
21279428148Smillert 		return NULL;
21379428148Smillert 
21479428148Smillert 	ret = bwsalloc(s->len);
21579428148Smillert 
21679428148Smillert 	if (sort_mb_cur_max == 1)
21779428148Smillert 		memcpy(ret->data.cstr, s->data.cstr, s->len);
21879428148Smillert 	else
21979428148Smillert 		memcpy(ret->data.wstr, s->data.wstr,
22079428148Smillert 		    SIZEOF_WCHAR_STRING(s->len));
22179428148Smillert 
22279428148Smillert 	return ret;
22379428148Smillert }
22479428148Smillert 
22579428148Smillert /*
226fc5583eeSmillert  * Create a new binary string from a wide character buffer.
22779428148Smillert  */
22879428148Smillert struct bwstring *
22979428148Smillert bwssbdup(const wchar_t *str, size_t len)
23079428148Smillert {
23179428148Smillert 	if (str == NULL)
23279428148Smillert 		return (len == 0) ? bwsalloc(0) : NULL;
23379428148Smillert 	else {
23479428148Smillert 		struct bwstring *ret;
23579428148Smillert 		size_t i;
23679428148Smillert 
23779428148Smillert 		ret = bwsalloc(len);
23879428148Smillert 
23979428148Smillert 		if (sort_mb_cur_max == 1)
24079428148Smillert 			for (i = 0; i < len; ++i)
24179428148Smillert 				ret->data.cstr[i] = (unsigned char) str[i];
24279428148Smillert 		else
24379428148Smillert 			memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len));
24479428148Smillert 
24579428148Smillert 		return ret;
24679428148Smillert 	}
24779428148Smillert }
24879428148Smillert 
24979428148Smillert /*
25079428148Smillert  * Create a new binary string from a raw binary buffer.
25179428148Smillert  */
25279428148Smillert struct bwstring *
25379428148Smillert bwscsbdup(const unsigned char *str, size_t len)
25479428148Smillert {
25579428148Smillert 	struct bwstring *ret;
25679428148Smillert 
25779428148Smillert 	ret = bwsalloc(len);
25879428148Smillert 
25979428148Smillert 	if (str) {
26079428148Smillert 		if (sort_mb_cur_max == 1)
26179428148Smillert 			memcpy(ret->data.cstr, str, len);
26279428148Smillert 		else {
26379428148Smillert 			mbstate_t mbs;
26479428148Smillert 			const char *s;
26579428148Smillert 			size_t charlen, chars, cptr;
26679428148Smillert 
26779428148Smillert 			chars = 0;
26879428148Smillert 			cptr = 0;
26979428148Smillert 			s = (const char *) str;
27079428148Smillert 
27179428148Smillert 			memset(&mbs, 0, sizeof(mbs));
27279428148Smillert 
27379428148Smillert 			while (cptr < len) {
27479428148Smillert 				size_t n = sort_mb_cur_max;
27579428148Smillert 
27679428148Smillert 				if (n > len - cptr)
27779428148Smillert 					n = len - cptr;
27879428148Smillert 				charlen = mbrlen(s + cptr, n, &mbs);
27979428148Smillert 				switch (charlen) {
28079428148Smillert 				case 0:
28179428148Smillert 					/* FALLTHROUGH */
28279428148Smillert 				case (size_t) -1:
28379428148Smillert 					/* FALLTHROUGH */
28479428148Smillert 				case (size_t) -2:
28579428148Smillert 					ret->data.wstr[chars++] =
28679428148Smillert 					    (unsigned char) s[cptr];
28779428148Smillert 					++cptr;
28879428148Smillert 					break;
28979428148Smillert 				default:
29079428148Smillert 					n = mbrtowc(ret->data.wstr + (chars++),
29179428148Smillert 					    s + cptr, charlen, &mbs);
29279428148Smillert 					if ((n == (size_t)-1) || (n == (size_t)-2))
29379428148Smillert 						/* NOTREACHED */
29479428148Smillert 						err(2, "mbrtowc error");
29579428148Smillert 					cptr += charlen;
296*479c151dSjsg 				}
29779428148Smillert 			}
29879428148Smillert 
29979428148Smillert 			ret->len = chars;
30079428148Smillert 			ret->data.wstr[ret->len] = L'\0';
30179428148Smillert 		}
30279428148Smillert 	}
30379428148Smillert 	return ret;
30479428148Smillert }
30579428148Smillert 
30679428148Smillert /*
30779428148Smillert  * De-allocate object memory
30879428148Smillert  */
30979428148Smillert void
31079428148Smillert bwsfree(struct bwstring *s)
31179428148Smillert {
31279428148Smillert 	sort_free(s);
31379428148Smillert }
31479428148Smillert 
31579428148Smillert /*
31679428148Smillert  * Copy content of src binary string to dst.
31779428148Smillert  * If the capacity of the dst string is not sufficient,
31879428148Smillert  * then the data is truncated.
31979428148Smillert  */
32079428148Smillert size_t
32179428148Smillert bwscpy(struct bwstring *dst, const struct bwstring *src)
32279428148Smillert {
32379428148Smillert 	size_t nums = src->len;
32479428148Smillert 
32579428148Smillert 	if (nums > dst->len)
32679428148Smillert 		nums = dst->len;
32779428148Smillert 	dst->len = nums;
32879428148Smillert 
32979428148Smillert 	if (sort_mb_cur_max == 1) {
33079428148Smillert 		memcpy(dst->data.cstr, src->data.cstr, nums);
33179428148Smillert 		dst->data.cstr[dst->len] = '\0';
33279428148Smillert 	} else {
33379428148Smillert 		memcpy(dst->data.wstr, src->data.wstr,
33479428148Smillert 		    SIZEOF_WCHAR_STRING(nums + 1));
33579428148Smillert 		dst->data.wstr[dst->len] = L'\0';
33679428148Smillert 	}
33779428148Smillert 
33879428148Smillert 	return nums;
33979428148Smillert }
34079428148Smillert 
34179428148Smillert /*
34279428148Smillert  * Copy content of src binary string to dst,
34379428148Smillert  * with specified number of symbols to be copied.
34479428148Smillert  * If the capacity of the dst string is not sufficient,
34579428148Smillert  * then the data is truncated.
34679428148Smillert  */
34779428148Smillert struct bwstring *
34879428148Smillert bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
34979428148Smillert {
35079428148Smillert 	size_t nums = src->len;
35179428148Smillert 
35279428148Smillert 	if (nums > dst->len)
35379428148Smillert 		nums = dst->len;
35479428148Smillert 	if (nums > size)
35579428148Smillert 		nums = size;
35679428148Smillert 	dst->len = nums;
35779428148Smillert 
35879428148Smillert 	if (sort_mb_cur_max == 1) {
35979428148Smillert 		memcpy(dst->data.cstr, src->data.cstr, nums);
36079428148Smillert 		dst->data.cstr[dst->len] = '\0';
36179428148Smillert 	} else {
36279428148Smillert 		memcpy(dst->data.wstr, src->data.wstr,
36379428148Smillert 		    SIZEOF_WCHAR_STRING(nums + 1));
36479428148Smillert 		dst->data.wstr[dst->len] = L'\0';
36579428148Smillert 	}
36679428148Smillert 
36779428148Smillert 	return dst;
36879428148Smillert }
36979428148Smillert 
37079428148Smillert /*
37179428148Smillert  * Copy content of src binary string to dst,
37279428148Smillert  * with specified number of symbols to be copied.
37379428148Smillert  * An offset value can be specified, from the start of src string.
37479428148Smillert  * If the capacity of the dst string is not sufficient,
37579428148Smillert  * then the data is truncated.
37679428148Smillert  */
37779428148Smillert struct bwstring *
37879428148Smillert bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
37979428148Smillert     size_t size)
38079428148Smillert {
38179428148Smillert 	if (offset >= src->len) {
38279428148Smillert 		dst->data.wstr[0] = 0;
38379428148Smillert 		dst->len = 0;
38479428148Smillert 	} else {
38579428148Smillert 		size_t nums = src->len - offset;
38679428148Smillert 
38779428148Smillert 		if (nums > dst->len)
38879428148Smillert 			nums = dst->len;
38979428148Smillert 		if (nums > size)
39079428148Smillert 			nums = size;
39179428148Smillert 		dst->len = nums;
39279428148Smillert 		if (sort_mb_cur_max == 1) {
39379428148Smillert 			memcpy(dst->data.cstr, src->data.cstr + offset,
39479428148Smillert 			    (nums));
39579428148Smillert 			dst->data.cstr[dst->len] = '\0';
39679428148Smillert 		} else {
39779428148Smillert 			memcpy(dst->data.wstr, src->data.wstr + offset,
39879428148Smillert 			    SIZEOF_WCHAR_STRING(nums));
39979428148Smillert 			dst->data.wstr[dst->len] = L'\0';
40079428148Smillert 		}
40179428148Smillert 	}
40279428148Smillert 	return dst;
40379428148Smillert }
40479428148Smillert 
40579428148Smillert /*
40679428148Smillert  * Write binary string to the file.
40779428148Smillert  * The output is ended either with '\n' (nl == true)
40879428148Smillert  * or '\0' (nl == false).
40979428148Smillert  */
41079428148Smillert size_t
41179428148Smillert bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
41279428148Smillert {
41379428148Smillert 	if (sort_mb_cur_max == 1) {
41479428148Smillert 		size_t len = bws->len;
41579428148Smillert 
41679428148Smillert 		if (!zero_ended) {
41779428148Smillert 			bws->data.cstr[len] = '\n';
41879428148Smillert 
41979428148Smillert 			if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
42079428148Smillert 				err(2, NULL);
42179428148Smillert 
42279428148Smillert 			bws->data.cstr[len] = '\0';
42379428148Smillert 		} else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
42479428148Smillert 			err(2, NULL);
42579428148Smillert 
42679428148Smillert 		return len + 1;
42779428148Smillert 
42879428148Smillert 	} else {
42979428148Smillert 		wchar_t eols;
43079428148Smillert 		size_t printed = 0;
43179428148Smillert 
43279428148Smillert 		eols = zero_ended ? btowc('\0') : btowc('\n');
43379428148Smillert 
43479428148Smillert 		while (printed < BWSLEN(bws)) {
43579428148Smillert 			const wchar_t *s = bws->data.wstr + printed;
43679428148Smillert 
43779428148Smillert 			if (*s == L'\0') {
43879428148Smillert 				int nums;
43979428148Smillert 
44079428148Smillert 				nums = fwprintf(f, L"%lc", *s);
44179428148Smillert 
44279428148Smillert 				if (nums != 1)
44379428148Smillert 					err(2, NULL);
44479428148Smillert 				++printed;
44579428148Smillert 			} else {
44679428148Smillert 				int nums;
44779428148Smillert 
44879428148Smillert 				nums = fwprintf(f, L"%ls", s);
44979428148Smillert 
45079428148Smillert 				if (nums < 1)
45179428148Smillert 					err(2, NULL);
45279428148Smillert 				printed += nums;
45379428148Smillert 			}
45479428148Smillert 		}
45579428148Smillert 		fwprintf(f, L"%lc", eols);
45679428148Smillert 		return printed + 1;
45779428148Smillert 	}
45879428148Smillert }
45979428148Smillert 
46079428148Smillert /*
46179428148Smillert  * Allocate and read a binary string from file.
46279428148Smillert  * The strings are nl-ended or zero-ended, depending on the sort setting.
46379428148Smillert  */
46479428148Smillert struct bwstring *
46579428148Smillert bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
46679428148Smillert {
46779428148Smillert 	wint_t eols;
46879428148Smillert 
46979428148Smillert 	eols = zero_ended ? btowc('\0') : btowc('\n');
47079428148Smillert 
47179428148Smillert 	if (!zero_ended && (sort_mb_cur_max > 1)) {
47279428148Smillert 		wchar_t *ret;
47379428148Smillert 
47479428148Smillert 		ret = fgetwln(f, len);
47579428148Smillert 
47679428148Smillert 		if (ret == NULL) {
47779428148Smillert 			if (!feof(f))
47879428148Smillert 				err(2, NULL);
47979428148Smillert 			return NULL;
48079428148Smillert 		}
48179428148Smillert 		if (*len > 0) {
48279428148Smillert 			if (ret[*len - 1] == (wchar_t)eols)
48379428148Smillert 				--(*len);
48479428148Smillert 		}
48579428148Smillert 		return bwssbdup(ret, *len);
48679428148Smillert 
48779428148Smillert 	} else if (!zero_ended && (sort_mb_cur_max == 1)) {
48879428148Smillert 		char *ret;
48979428148Smillert 
49079428148Smillert 		ret = fgetln(f, len);
49179428148Smillert 
49279428148Smillert 		if (ret == NULL) {
49379428148Smillert 			if (!feof(f))
49479428148Smillert 				err(2, NULL);
49579428148Smillert 			return NULL;
49679428148Smillert 		}
49779428148Smillert 		if (*len > 0) {
49879428148Smillert 			if (ret[*len - 1] == '\n')
49979428148Smillert 				--(*len);
50079428148Smillert 		}
50179428148Smillert 		return bwscsbdup((unsigned char *)ret, *len);
50279428148Smillert 
50379428148Smillert 	} else {
50479428148Smillert 		*len = 0;
50579428148Smillert 
50679428148Smillert 		if (feof(f))
50779428148Smillert 			return NULL;
50879428148Smillert 
50979428148Smillert 		if (2 >= rb->fgetwln_z_buffer_size) {
51079428148Smillert 			rb->fgetwln_z_buffer_size += 256;
51179428148Smillert 			rb->fgetwln_z_buffer =
51279428148Smillert 			    sort_reallocarray(rb->fgetwln_z_buffer,
51379428148Smillert 			    rb->fgetwln_z_buffer_size, sizeof(wchar_t));
51479428148Smillert 		}
51579428148Smillert 		rb->fgetwln_z_buffer[*len] = 0;
51679428148Smillert 
517c9bc8d3fSmillert 		if (sort_mb_cur_max == 1) {
51879428148Smillert 			while (!feof(f)) {
51979428148Smillert 				int c;
52079428148Smillert 
52179428148Smillert 				c = fgetc(f);
52279428148Smillert 
52379428148Smillert 				if (c == EOF) {
52479428148Smillert 					if (*len == 0)
52579428148Smillert 						return NULL;
52679428148Smillert 					goto line_read_done;
52779428148Smillert 				}
52879428148Smillert 				if (c == eols)
52979428148Smillert 					goto line_read_done;
53079428148Smillert 
53179428148Smillert 				if (*len + 1 >= rb->fgetwln_z_buffer_size) {
53279428148Smillert 					rb->fgetwln_z_buffer_size += 256;
53379428148Smillert 					rb->fgetwln_z_buffer =
53479428148Smillert 					    sort_reallocarray(rb->fgetwln_z_buffer,
53579428148Smillert 					    rb->fgetwln_z_buffer_size, sizeof(wchar_t));
53679428148Smillert 				}
53779428148Smillert 
53879428148Smillert 				rb->fgetwln_z_buffer[*len] = c;
53979428148Smillert 				rb->fgetwln_z_buffer[++(*len)] = 0;
54079428148Smillert 			}
541c9bc8d3fSmillert 		} else {
54279428148Smillert 			while (!feof(f)) {
54379428148Smillert 				wint_t c = 0;
54479428148Smillert 
54579428148Smillert 				c = fgetwc(f);
54679428148Smillert 
54779428148Smillert 				if (c == WEOF) {
54879428148Smillert 					if (*len == 0)
54979428148Smillert 						return NULL;
55079428148Smillert 					goto line_read_done;
55179428148Smillert 				}
55279428148Smillert 				if (c == eols)
55379428148Smillert 					goto line_read_done;
55479428148Smillert 
55579428148Smillert 				if (*len + 1 >= rb->fgetwln_z_buffer_size) {
55679428148Smillert 					rb->fgetwln_z_buffer_size += 256;
55779428148Smillert 					rb->fgetwln_z_buffer =
55879428148Smillert 					    sort_reallocarray(rb->fgetwln_z_buffer,
55979428148Smillert 					    rb->fgetwln_z_buffer_size, sizeof(wchar_t));
56079428148Smillert 				}
56179428148Smillert 
56279428148Smillert 				rb->fgetwln_z_buffer[*len] = c;
56379428148Smillert 				rb->fgetwln_z_buffer[++(*len)] = 0;
56479428148Smillert 			}
565c9bc8d3fSmillert 		}
56679428148Smillert 
56779428148Smillert line_read_done:
56879428148Smillert 		/* we do not count the last 0 */
56979428148Smillert 		return bwssbdup(rb->fgetwln_z_buffer, *len);
57079428148Smillert 	}
57179428148Smillert }
57279428148Smillert 
57379428148Smillert int
57479428148Smillert bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
57579428148Smillert     size_t offset, size_t len)
57679428148Smillert {
57779428148Smillert 	size_t cmp_len, len1, len2;
57879428148Smillert 	int res = 0;
57979428148Smillert 
58079428148Smillert 	len1 = bws1->len;
58179428148Smillert 	len2 = bws2->len;
58279428148Smillert 
58379428148Smillert 	if (len1 <= offset) {
58479428148Smillert 		return (len2 <= offset) ? 0 : -1;
58579428148Smillert 	} else {
58679428148Smillert 		if (len2 <= offset)
58779428148Smillert 			return 1;
58879428148Smillert 		else {
58979428148Smillert 			len1 -= offset;
59079428148Smillert 			len2 -= offset;
59179428148Smillert 
59279428148Smillert 			cmp_len = len1;
59379428148Smillert 
59479428148Smillert 			if (len2 < cmp_len)
59579428148Smillert 				cmp_len = len2;
59679428148Smillert 
59779428148Smillert 			if (len < cmp_len)
59879428148Smillert 				cmp_len = len;
59979428148Smillert 
60079428148Smillert 			if (sort_mb_cur_max == 1) {
60179428148Smillert 				const unsigned char *s1, *s2;
60279428148Smillert 
60379428148Smillert 				s1 = bws1->data.cstr + offset;
60479428148Smillert 				s2 = bws2->data.cstr + offset;
60579428148Smillert 
60679428148Smillert 				res = memcmp(s1, s2, cmp_len);
60779428148Smillert 
60879428148Smillert 			} else {
60979428148Smillert 				const wchar_t *s1, *s2;
61079428148Smillert 
61179428148Smillert 				s1 = bws1->data.wstr + offset;
61279428148Smillert 				s2 = bws2->data.wstr + offset;
61379428148Smillert 
61479428148Smillert 				res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
61579428148Smillert 			}
61679428148Smillert 		}
61779428148Smillert 	}
61879428148Smillert 
61979428148Smillert 	if (res == 0) {
62079428148Smillert 		if (len1 < cmp_len && len1 < len2)
62179428148Smillert 			res = -1;
62279428148Smillert 		else if (len2 < cmp_len && len2 < len1)
62379428148Smillert 			res = +1;
62479428148Smillert 	}
62579428148Smillert 
62679428148Smillert 	return res;
62779428148Smillert }
62879428148Smillert 
62979428148Smillert int
63079428148Smillert bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
63179428148Smillert {
63279428148Smillert 	size_t len1, len2, cmp_len;
63379428148Smillert 	int res;
63479428148Smillert 
63579428148Smillert 	len1 = bws1->len;
63679428148Smillert 	len2 = bws2->len;
63779428148Smillert 
63879428148Smillert 	len1 -= offset;
63979428148Smillert 	len2 -= offset;
64079428148Smillert 
64179428148Smillert 	cmp_len = len1;
64279428148Smillert 
64379428148Smillert 	if (len2 < cmp_len)
64479428148Smillert 		cmp_len = len2;
64579428148Smillert 
64679428148Smillert 	res = bwsncmp(bws1, bws2, offset, cmp_len);
64779428148Smillert 
64879428148Smillert 	if (res == 0) {
64979428148Smillert 		if (len1 < len2)
65079428148Smillert 			res = -1;
65179428148Smillert 		else if (len2 < len1)
65279428148Smillert 			res = +1;
65379428148Smillert 	}
65479428148Smillert 
65579428148Smillert 	return res;
65679428148Smillert }
65779428148Smillert 
65879428148Smillert int
65979428148Smillert bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
66079428148Smillert {
66179428148Smillert 	wchar_t c1, c2;
66279428148Smillert 	size_t i = 0;
66379428148Smillert 
66479428148Smillert 	for (i = 0; i < len; ++i) {
66579428148Smillert 		c1 = bws_get_iter_value(iter1);
66679428148Smillert 		c2 = bws_get_iter_value(iter2);
66779428148Smillert 		if (c1 != c2)
66879428148Smillert 			return c1 - c2;
66979428148Smillert 		iter1 = bws_iterator_inc(iter1, 1);
67079428148Smillert 		iter2 = bws_iterator_inc(iter2, 1);
67179428148Smillert 	}
67279428148Smillert 
67379428148Smillert 	return 0;
67479428148Smillert }
67579428148Smillert 
67679428148Smillert int
67779428148Smillert bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
67879428148Smillert {
67979428148Smillert 	size_t len1, len2;
68079428148Smillert 
68179428148Smillert 	len1 = bws1->len;
68279428148Smillert 	len2 = bws2->len;
68379428148Smillert 
68479428148Smillert 	if (len1 <= offset)
68579428148Smillert 		return (len2 <= offset) ? 0 : -1;
68652e4174eSschwarze 
68779428148Smillert 	if (len2 <= offset)
68879428148Smillert 		return 1;
68952e4174eSschwarze 
69079428148Smillert 	len1 -= offset;
69179428148Smillert 	len2 -= offset;
69279428148Smillert 
69379428148Smillert 	if (sort_mb_cur_max == 1) {
69479428148Smillert 		const unsigned char *s1, *s2;
69552e4174eSschwarze 		int res;
69679428148Smillert 
69779428148Smillert 		s1 = bws1->data.cstr + offset;
69879428148Smillert 		s2 = bws2->data.cstr + offset;
69979428148Smillert 
70079428148Smillert 		if (len1 > len2) {
70179428148Smillert 			res = memcmp(s1, s2, len2);
70279428148Smillert 			if (!res)
70379428148Smillert 				res = +1;
70479428148Smillert 		} else if (len1 < len2) {
70579428148Smillert 			res = memcmp(s1, s2, len1);
70679428148Smillert 			if (!res)
70779428148Smillert 				res = -1;
70879428148Smillert 		} else
70979428148Smillert 			res = memcmp(s1, s2, len1);
71079428148Smillert 
71179428148Smillert 		return res;
71279428148Smillert 	} else {
71379428148Smillert 		const wchar_t *s1, *s2;
71479428148Smillert 		size_t i, maxlen;
71579428148Smillert 		int res = 0;
71679428148Smillert 
71779428148Smillert 		s1 = bws1->data.wstr + offset;
71879428148Smillert 		s2 = bws2->data.wstr + offset;
71979428148Smillert 
72079428148Smillert 		i = 0;
72179428148Smillert 		maxlen = len1;
72279428148Smillert 
72379428148Smillert 		if (maxlen > len2)
72479428148Smillert 			maxlen = len2;
72579428148Smillert 
72679428148Smillert 		while (i < maxlen) {
72779428148Smillert 
72879428148Smillert 			/* goto next non-zero part: */
729eedbfd81Sschwarze 			while (i < maxlen &&
730eedbfd81Sschwarze 			    s1[i] == L'\0' && s2[i] == L'\0')
73179428148Smillert 				++i;
73279428148Smillert 
73379428148Smillert 			if (i >= maxlen)
73479428148Smillert 				break;
73579428148Smillert 
736eedbfd81Sschwarze 			if (s1[i] == L'\0') {
737eedbfd81Sschwarze 				if (s2[i] == L'\0')
73879428148Smillert 					/* NOTREACHED */
73979428148Smillert 					err(2, "bwscoll error 1");
74079428148Smillert 				else
74179428148Smillert 					return -1;
742eedbfd81Sschwarze 			} else if (s2[i] == L'\0')
74379428148Smillert 				return 1;
74479428148Smillert 
74579428148Smillert 			res = wide_str_coll(s1 + i, s2 + i);
74679428148Smillert 			if (res)
74779428148Smillert 				return res;
74879428148Smillert 
749eedbfd81Sschwarze 			while (i < maxlen && s1[i] != L'\0' && s2[i] != L'\0')
75079428148Smillert 				++i;
75179428148Smillert 
75279428148Smillert 			if (i >= maxlen)
75379428148Smillert 				break;
75479428148Smillert 
755eedbfd81Sschwarze 			if (s1[i] == L'\0') {
756eedbfd81Sschwarze 				if (s2[i] == L'\0') {
75779428148Smillert 					++i;
75879428148Smillert 					continue;
75979428148Smillert 				} else
76079428148Smillert 					return -1;
761eedbfd81Sschwarze 			} else if (s2[i] == L'\0')
76279428148Smillert 				return 1;
76379428148Smillert 			else
76479428148Smillert 				/* NOTREACHED */
76579428148Smillert 				err(2, "bwscoll error 2");
76679428148Smillert 		}
76779428148Smillert 
76879428148Smillert 		if (len1 == len2)
76979428148Smillert 			return 0;
77079428148Smillert 		return len1 < len2 ? -1 : 1;
77179428148Smillert 	}
77279428148Smillert }
77379428148Smillert 
77479428148Smillert /*
77579428148Smillert  * Correction of the system API
77679428148Smillert  */
77779428148Smillert double
77879428148Smillert bwstod(struct bwstring *s0, bool *empty)
77979428148Smillert {
78079428148Smillert 	double ret = 0;
78179428148Smillert 
78279428148Smillert 	if (sort_mb_cur_max == 1) {
78379428148Smillert 		char *ep, *end, *s;
78479428148Smillert 
78579428148Smillert 		s = (char *)s0->data.cstr;
78679428148Smillert 		end = s + s0->len;
78779428148Smillert 		ep = NULL;
78879428148Smillert 
78979428148Smillert 		while (isblank((unsigned char)*s) && s < end)
79079428148Smillert 			++s;
79179428148Smillert 
79279428148Smillert 		if (!isprint((unsigned char)*s)) {
79379428148Smillert 			*empty = true;
79479428148Smillert 			return 0;
79579428148Smillert 		}
79679428148Smillert 
79779428148Smillert 		ret = strtod(s, &ep);
79879428148Smillert 		if (ep == s) {
79979428148Smillert 			*empty = true;
80079428148Smillert 			return 0;
80179428148Smillert 		}
80279428148Smillert 	} else {
80379428148Smillert 		wchar_t *end, *ep, *s;
80479428148Smillert 
80579428148Smillert 		s = s0->data.wstr;
80679428148Smillert 		end = s + s0->len;
80779428148Smillert 		ep = NULL;
80879428148Smillert 
80979428148Smillert 		while (iswblank(*s) && s < end)
81079428148Smillert 			++s;
81179428148Smillert 
81279428148Smillert 		if (!iswprint(*s)) {
81379428148Smillert 			*empty = true;
81479428148Smillert 			return 0;
81579428148Smillert 		}
81679428148Smillert 
81779428148Smillert 		ret = wcstod(s, &ep);
81879428148Smillert 		if (ep == s) {
81979428148Smillert 			*empty = true;
82079428148Smillert 			return 0;
82179428148Smillert 		}
82279428148Smillert 	}
82379428148Smillert 
82479428148Smillert 	*empty = false;
82579428148Smillert 	return ret;
82679428148Smillert }
82779428148Smillert 
82879428148Smillert /*
82979428148Smillert  * A helper function for monthcoll.  If a line matches
83079428148Smillert  * a month name, it returns (number of the month - 1),
83179428148Smillert  * while if there is no match, it just return -1.
83279428148Smillert  */
83379428148Smillert int
83479428148Smillert bws_month_score(const struct bwstring *s0)
83579428148Smillert {
83679428148Smillert 	if (sort_mb_cur_max == 1) {
83779428148Smillert 		const char *end, *s;
83879428148Smillert 		int i;
83979428148Smillert 
84079428148Smillert 		s = (char *)s0->data.cstr;
84179428148Smillert 		end = s + s0->len;
84279428148Smillert 
84379428148Smillert 		while (isblank((unsigned char)*s) && s < end)
84479428148Smillert 			++s;
84579428148Smillert 
84679428148Smillert 		for (i = 11; i >= 0; --i) {
84779428148Smillert 			if (cmonths[i] &&
84879428148Smillert 			    (s == strstr(s, cmonths[i])))
84979428148Smillert 				return i;
85079428148Smillert 		}
85179428148Smillert 	} else {
85279428148Smillert 		const wchar_t *end, *s;
85379428148Smillert 		int i;
85479428148Smillert 
85579428148Smillert 		s = s0->data.wstr;
85679428148Smillert 		end = s + s0->len;
85779428148Smillert 
85879428148Smillert 		while (iswblank(*s) && s < end)
85979428148Smillert 			++s;
86079428148Smillert 
86179428148Smillert 		for (i = 11; i >= 0; --i) {
86279428148Smillert 			if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
86379428148Smillert 				return i;
86479428148Smillert 		}
86579428148Smillert 	}
86679428148Smillert 
86779428148Smillert 	return -1;
86879428148Smillert }
86979428148Smillert 
87079428148Smillert /*
87179428148Smillert  * Rips out leading blanks (-b).
87279428148Smillert  */
87379428148Smillert struct bwstring *
87479428148Smillert ignore_leading_blanks(struct bwstring *str)
87579428148Smillert {
87679428148Smillert 	if (sort_mb_cur_max == 1) {
87779428148Smillert 		unsigned char *dst, *end, *src;
87879428148Smillert 
87979428148Smillert 		src = str->data.cstr;
88079428148Smillert 		dst = src;
88179428148Smillert 		end = src + str->len;
88279428148Smillert 
88379428148Smillert 		while (src < end && isblank(*src))
88479428148Smillert 			++src;
88579428148Smillert 
88679428148Smillert 		if (src != dst) {
88779428148Smillert 			size_t newlen;
88879428148Smillert 
88979428148Smillert 			newlen = BWSLEN(str) - (src - dst);
89079428148Smillert 
89179428148Smillert 			while (src < end) {
89279428148Smillert 				*dst = *src;
89379428148Smillert 				++dst;
89479428148Smillert 				++src;
89579428148Smillert 			}
89679428148Smillert 			bws_setlen(str, newlen);
89779428148Smillert 		}
89879428148Smillert 	} else {
89979428148Smillert 		wchar_t *dst, *end, *src;
90079428148Smillert 
90179428148Smillert 		src = str->data.wstr;
90279428148Smillert 		dst = src;
90379428148Smillert 		end = src + str->len;
90479428148Smillert 
90579428148Smillert 		while (src < end && iswblank(*src))
90679428148Smillert 			++src;
90779428148Smillert 
90879428148Smillert 		if (src != dst) {
90979428148Smillert 
91079428148Smillert 			size_t newlen = BWSLEN(str) - (src - dst);
91179428148Smillert 
91279428148Smillert 			while (src < end) {
91379428148Smillert 				*dst = *src;
91479428148Smillert 				++dst;
91579428148Smillert 				++src;
91679428148Smillert 			}
91779428148Smillert 			bws_setlen(str, newlen);
91879428148Smillert 
91979428148Smillert 		}
92079428148Smillert 	}
92179428148Smillert 	return str;
92279428148Smillert }
92379428148Smillert 
92479428148Smillert /*
92579428148Smillert  * Rips out nonprinting characters (-i).
92679428148Smillert  */
92779428148Smillert struct bwstring *
92879428148Smillert ignore_nonprinting(struct bwstring *str)
92979428148Smillert {
93079428148Smillert 	size_t newlen = str->len;
93179428148Smillert 
93279428148Smillert 	if (sort_mb_cur_max == 1) {
93379428148Smillert 		unsigned char *dst, *end, *src;
93479428148Smillert 		unsigned char c;
93579428148Smillert 
93679428148Smillert 		src = str->data.cstr;
93779428148Smillert 		dst = src;
93879428148Smillert 		end = src + str->len;
93979428148Smillert 
94079428148Smillert 		while (src < end) {
94179428148Smillert 			c = *src;
94279428148Smillert 			if (isprint(c)) {
94379428148Smillert 				*dst = c;
94479428148Smillert 				++dst;
94579428148Smillert 				++src;
94679428148Smillert 			} else {
94779428148Smillert 				++src;
94879428148Smillert 				--newlen;
94979428148Smillert 			}
95079428148Smillert 		}
95179428148Smillert 	} else {
95279428148Smillert 		wchar_t *dst, *end, *src;
95379428148Smillert 		wchar_t c;
95479428148Smillert 
95579428148Smillert 		src = str->data.wstr;
95679428148Smillert 		dst = src;
95779428148Smillert 		end = src + str->len;
95879428148Smillert 
95979428148Smillert 		while (src < end) {
96079428148Smillert 			c = *src;
96179428148Smillert 			if (iswprint(c)) {
96279428148Smillert 				*dst = c;
96379428148Smillert 				++dst;
96479428148Smillert 				++src;
96579428148Smillert 			} else {
96679428148Smillert 				++src;
96779428148Smillert 				--newlen;
96879428148Smillert 			}
96979428148Smillert 		}
97079428148Smillert 	}
97179428148Smillert 	bws_setlen(str, newlen);
97279428148Smillert 
97379428148Smillert 	return str;
97479428148Smillert }
97579428148Smillert 
97679428148Smillert /*
97779428148Smillert  * Rips out any characters that are not alphanumeric characters
97879428148Smillert  * nor blanks (-d).
97979428148Smillert  */
98079428148Smillert struct bwstring *
98179428148Smillert dictionary_order(struct bwstring *str)
98279428148Smillert {
98379428148Smillert 	size_t newlen = str->len;
98479428148Smillert 
98579428148Smillert 	if (sort_mb_cur_max == 1) {
98679428148Smillert 		unsigned char *dst, *end, *src;
98779428148Smillert 		unsigned char c;
98879428148Smillert 
98979428148Smillert 		src = str->data.cstr;
99079428148Smillert 		dst = src;
99179428148Smillert 		end = src + str->len;
99279428148Smillert 
99379428148Smillert 		while (src < end) {
99479428148Smillert 			c = *src;
99579428148Smillert 			if (isalnum(c) || isblank(c)) {
99679428148Smillert 				*dst = c;
99779428148Smillert 				++dst;
99879428148Smillert 				++src;
99979428148Smillert 			} else {
100079428148Smillert 				++src;
100179428148Smillert 				--newlen;
100279428148Smillert 			}
100379428148Smillert 		}
100479428148Smillert 	} else {
100579428148Smillert 		wchar_t *dst, *end, *src;
100679428148Smillert 		wchar_t c;
100779428148Smillert 
100879428148Smillert 		src = str->data.wstr;
100979428148Smillert 		dst = src;
101079428148Smillert 		end = src + str->len;
101179428148Smillert 
101279428148Smillert 		while (src < end) {
101379428148Smillert 			c = *src;
101479428148Smillert 			if (iswalnum(c) || iswblank(c)) {
101579428148Smillert 				*dst = c;
101679428148Smillert 				++dst;
101779428148Smillert 				++src;
101879428148Smillert 			} else {
101979428148Smillert 				++src;
102079428148Smillert 				--newlen;
102179428148Smillert 			}
102279428148Smillert 		}
102379428148Smillert 	}
102479428148Smillert 	bws_setlen(str, newlen);
102579428148Smillert 
102679428148Smillert 	return str;
102779428148Smillert }
102879428148Smillert 
102979428148Smillert /*
103079428148Smillert  * Converts string to lower case(-f).
103179428148Smillert  */
103279428148Smillert struct bwstring *
103379428148Smillert ignore_case(struct bwstring *str)
103479428148Smillert {
103579428148Smillert 	if (sort_mb_cur_max == 1) {
103679428148Smillert 		unsigned char *end, *s;
103779428148Smillert 
103879428148Smillert 		s = str->data.cstr;
103979428148Smillert 		end = s + str->len;
104079428148Smillert 
104179428148Smillert 		while (s < end) {
104279428148Smillert 			*s = toupper(*s);
104379428148Smillert 			++s;
104479428148Smillert 		}
104579428148Smillert 	} else {
104679428148Smillert 		wchar_t *end, *s;
104779428148Smillert 
104879428148Smillert 		s = str->data.wstr;
104979428148Smillert 		end = s + str->len;
105079428148Smillert 
105179428148Smillert 		while (s < end) {
105279428148Smillert 			*s = towupper(*s);
105379428148Smillert 			++s;
105479428148Smillert 		}
105579428148Smillert 	}
105679428148Smillert 	return str;
105779428148Smillert }
105879428148Smillert 
105979428148Smillert void
106079428148Smillert bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
106179428148Smillert {
106279428148Smillert 	if (sort_mb_cur_max == 1)
106379428148Smillert 		warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
106479428148Smillert 	else
106579428148Smillert 		warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
106679428148Smillert }
1067