xref: /openbsd-src/usr.bin/sort/coll.c (revision 479c151d3429b7cfa6228ee428d945620629789d)
1*479c151dSjsg /*	$OpenBSD: coll.c,v 1.13 2024/09/20 02:00:46 jsg Exp $	*/
279428148Smillert 
379428148Smillert /*-
479428148Smillert  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
579428148Smillert  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
679428148Smillert  * All rights reserved.
779428148Smillert  *
879428148Smillert  * Redistribution and use in source and binary forms, with or without
979428148Smillert  * modification, are permitted provided that the following conditions
1079428148Smillert  * are met:
1179428148Smillert  * 1. Redistributions of source code must retain the above copyright
1279428148Smillert  *    notice, this list of conditions and the following disclaimer.
1379428148Smillert  * 2. Redistributions in binary form must reproduce the above copyright
1479428148Smillert  *    notice, this list of conditions and the following disclaimer in the
1579428148Smillert  *    documentation and/or other materials provided with the distribution.
1679428148Smillert  *
1779428148Smillert  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1879428148Smillert  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1979428148Smillert  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2079428148Smillert  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2179428148Smillert  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2279428148Smillert  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2379428148Smillert  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2479428148Smillert  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2579428148Smillert  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2679428148Smillert  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2779428148Smillert  * SUCH DAMAGE.
2879428148Smillert  */
2979428148Smillert 
3079428148Smillert #include <sys/types.h>
3179428148Smillert 
3279428148Smillert #include <errno.h>
3379428148Smillert #include <err.h>
3479428148Smillert #include <langinfo.h>
3579428148Smillert #include <limits.h>
3679428148Smillert #include <math.h>
3779428148Smillert #include <md5.h>
3879428148Smillert #include <stdlib.h>
3979428148Smillert #include <string.h>
4079428148Smillert #include <wchar.h>
4179428148Smillert #include <wctype.h>
4279428148Smillert 
4379428148Smillert #include "coll.h"
4479428148Smillert #include "vsort.h"
4579428148Smillert 
4679428148Smillert struct key_specs *keys;
4779428148Smillert size_t keys_num = 0;
4879428148Smillert 
4979428148Smillert static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
5079428148Smillert static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
5179428148Smillert static int monthcoll(struct key_value*, struct key_value *, size_t offset);
5279428148Smillert static int numcoll(struct key_value*, struct key_value *, size_t offset);
5379428148Smillert static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
5479428148Smillert static int randomcoll(struct key_value*, struct key_value *, size_t offset);
5579428148Smillert static int versioncoll(struct key_value*, struct key_value *, size_t offset);
5679428148Smillert 
5779428148Smillert /*
5879428148Smillert  * Allocate keys array
5979428148Smillert  */
6079428148Smillert struct keys_array *
6179428148Smillert keys_array_alloc(void)
6279428148Smillert {
6379428148Smillert 	struct keys_array *ka;
6479428148Smillert 	size_t sz;
6579428148Smillert 
6679428148Smillert 	sz = keys_array_size();
678fd2d339Smillert 	ka = sort_calloc(1, sz);
6879428148Smillert 
6979428148Smillert 	return ka;
7079428148Smillert }
7179428148Smillert 
7279428148Smillert /*
7379428148Smillert  * Calculate whether we need key hint space
7479428148Smillert  */
7579428148Smillert static size_t
7679428148Smillert key_hint_size(void)
7779428148Smillert {
7879428148Smillert 	return need_hint ? sizeof(struct key_hint) : 0;
7979428148Smillert }
8079428148Smillert 
8179428148Smillert /*
8279428148Smillert  * Calculate keys array size
8379428148Smillert  */
8479428148Smillert size_t
8579428148Smillert keys_array_size(void)
8679428148Smillert {
8779428148Smillert 	return keys_num * (sizeof(struct key_value) + key_hint_size());
8879428148Smillert }
8979428148Smillert 
9079428148Smillert /*
9179428148Smillert  * Clean data of keys array
9279428148Smillert  */
9379428148Smillert void
9479428148Smillert clean_keys_array(const struct bwstring *s, struct keys_array *ka)
9579428148Smillert {
9679428148Smillert 	if (ka) {
9779428148Smillert 		size_t i;
9879428148Smillert 
9979428148Smillert 		for (i = 0; i < keys_num; ++i)
10079428148Smillert 			if (ka->key[i].k && ka->key[i].k != s)
10179428148Smillert 				bwsfree(ka->key[i].k);
10279428148Smillert 		memset(ka, 0, keys_array_size());
10379428148Smillert 	}
10479428148Smillert }
10579428148Smillert 
10679428148Smillert /*
10779428148Smillert  * Set value of a key in the keys set
10879428148Smillert  */
10979428148Smillert void
11079428148Smillert set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
11179428148Smillert {
11279428148Smillert 	if (ka && keys_num > ind) {
11379428148Smillert 		struct key_value *kv;
11479428148Smillert 
11579428148Smillert 		kv = &(ka->key[ind]);
11679428148Smillert 
1178573467eStobias 		if (kv->k != s)
11879428148Smillert 			bwsfree(kv->k);
11979428148Smillert 		kv->k = s;
12079428148Smillert 	}
12179428148Smillert }
12279428148Smillert 
12379428148Smillert /*
12479428148Smillert  * Initialize a sort list item
12579428148Smillert  */
12679428148Smillert struct sort_list_item *
12779428148Smillert sort_list_item_alloc(void)
12879428148Smillert {
12979428148Smillert 	struct sort_list_item *si;
13079428148Smillert 	size_t sz;
13179428148Smillert 
13279428148Smillert 	sz = sizeof(struct sort_list_item) + keys_array_size();
1338fd2d339Smillert 	si = sort_calloc(1, sz);
13479428148Smillert 
13579428148Smillert 	return si;
13679428148Smillert }
13779428148Smillert 
13879428148Smillert size_t
13979428148Smillert sort_list_item_size(struct sort_list_item *si)
14079428148Smillert {
14179428148Smillert 	size_t i, ret = 0;
14279428148Smillert 
14379428148Smillert 	if (si) {
14479428148Smillert 		ret = sizeof(struct sort_list_item) + keys_array_size();
14579428148Smillert 		if (si->str)
14679428148Smillert 			ret += bws_memsize(si->str);
14779428148Smillert 		for (i = 0; i < keys_num; ++i) {
14879428148Smillert 			struct key_value *kv;
14979428148Smillert 
15079428148Smillert 			kv = &(si->ka.key[i]);
15179428148Smillert 
15279428148Smillert 			if (kv->k != si->str)
15379428148Smillert 				ret += bws_memsize(kv->k);
15479428148Smillert 		}
15579428148Smillert 	}
15679428148Smillert 	return ret;
15779428148Smillert }
15879428148Smillert 
15979428148Smillert /*
16079428148Smillert  * Calculate key for a sort list item
16179428148Smillert  */
16279428148Smillert static void
16379428148Smillert sort_list_item_make_key(struct sort_list_item *si)
16479428148Smillert {
16579428148Smillert 	preproc(si->str, &(si->ka));
16679428148Smillert }
16779428148Smillert 
16879428148Smillert /*
16979428148Smillert  * Set value of a sort list item.
17079428148Smillert  * Return combined string and keys memory size.
17179428148Smillert  */
17279428148Smillert void
17379428148Smillert sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
17479428148Smillert {
17579428148Smillert 	if (si) {
17679428148Smillert 		clean_keys_array(si->str, &(si->ka));
17779428148Smillert 		if (si->str) {
17879428148Smillert 			if (si->str == str) {
17979428148Smillert 				/* we are trying to reset the same string */
18079428148Smillert 				return;
18179428148Smillert 			} else {
18279428148Smillert 				bwsfree(si->str);
18379428148Smillert 				si->str = NULL;
18479428148Smillert 			}
18579428148Smillert 		}
18679428148Smillert 		si->str = str;
18779428148Smillert 		sort_list_item_make_key(si);
18879428148Smillert 	}
18979428148Smillert }
19079428148Smillert 
19179428148Smillert /*
19279428148Smillert  * De-allocate a sort list item object memory
19379428148Smillert  */
19479428148Smillert void
19579428148Smillert sort_list_item_clean(struct sort_list_item *si)
19679428148Smillert {
19779428148Smillert 	if (si) {
19879428148Smillert 		clean_keys_array(si->str, &(si->ka));
19979428148Smillert 		if (si->str) {
20079428148Smillert 			bwsfree(si->str);
20179428148Smillert 			si->str = NULL;
20279428148Smillert 		}
20379428148Smillert 	}
20479428148Smillert }
20579428148Smillert 
20679428148Smillert /*
20779428148Smillert  * Skip columns according to specs
20879428148Smillert  */
20979428148Smillert static size_t
21079428148Smillert skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
21179428148Smillert     bool skip_blanks, bool *empty_key)
21279428148Smillert {
21379428148Smillert 	if (cols < 1)
21479428148Smillert 		return BWSLEN(s) + 1;
21579428148Smillert 
21679428148Smillert 	if (skip_blanks)
21779428148Smillert 		while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
21879428148Smillert 			++start;
21979428148Smillert 
22079428148Smillert 	while (start < BWSLEN(s) && cols > 1) {
22179428148Smillert 		--cols;
22279428148Smillert 		++start;
22379428148Smillert 	}
22479428148Smillert 
22579428148Smillert 	if (start >= BWSLEN(s))
22679428148Smillert 		*empty_key = true;
22779428148Smillert 
22879428148Smillert 	return start;
22979428148Smillert }
23079428148Smillert 
23179428148Smillert /*
23279428148Smillert  * Skip fields according to specs
23379428148Smillert  */
23479428148Smillert static size_t
23579428148Smillert skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
23679428148Smillert {
23779428148Smillert 	if (fields < 2) {
23879428148Smillert 		if (BWSLEN(s) == 0)
23979428148Smillert 			*empty_field = true;
24079428148Smillert 		return 0;
24179428148Smillert 	} else if (!(sort_opts_vals.tflag)) {
24279428148Smillert 		size_t cpos = 0;
24379428148Smillert 		bool pb = true;
24479428148Smillert 
24579428148Smillert 		while (cpos < BWSLEN(s)) {
24679428148Smillert 			bool isblank;
24779428148Smillert 
24879428148Smillert 			isblank = iswblank(BWS_GET(s, cpos));
24979428148Smillert 
25079428148Smillert 			if (isblank && !pb) {
25179428148Smillert 				--fields;
25279428148Smillert 				if (fields <= 1)
25379428148Smillert 					return cpos;
25479428148Smillert 			}
25579428148Smillert 			pb = isblank;
25679428148Smillert 			++cpos;
25779428148Smillert 		}
25879428148Smillert 		if (fields > 1)
25979428148Smillert 			*empty_field = true;
26079428148Smillert 		return cpos;
26179428148Smillert 	} else {
26279428148Smillert 		size_t cpos = 0;
26379428148Smillert 
26479428148Smillert 		while (cpos < BWSLEN(s)) {
26579428148Smillert 			if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
26679428148Smillert 				--fields;
26779428148Smillert 				if (fields <= 1)
26879428148Smillert 					return cpos + 1;
26979428148Smillert 			}
27079428148Smillert 			++cpos;
27179428148Smillert 		}
27279428148Smillert 		if (fields > 1)
27379428148Smillert 			*empty_field = true;
27479428148Smillert 		return cpos;
27579428148Smillert 	}
27679428148Smillert }
27779428148Smillert 
27879428148Smillert /*
27979428148Smillert  * Find fields start
28079428148Smillert  */
28179428148Smillert static void
28279428148Smillert find_field_start(const struct bwstring *s, struct key_specs *ks,
28379428148Smillert     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
28479428148Smillert {
28579428148Smillert 	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
28679428148Smillert 	if (!*empty_field)
28779428148Smillert 		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
28879428148Smillert 		    ks->pos1b, empty_key);
28979428148Smillert 	else
29079428148Smillert 		*empty_key = true;
29179428148Smillert }
29279428148Smillert 
29379428148Smillert /*
29479428148Smillert  * Find end key position
29579428148Smillert  */
29679428148Smillert static size_t
29779428148Smillert find_field_end(const struct bwstring *s, struct key_specs *ks)
29879428148Smillert {
29979428148Smillert 	size_t f2, next_field_start, pos_end;
30079428148Smillert 	bool empty_field, empty_key;
30179428148Smillert 
30279428148Smillert 	empty_field = false;
30379428148Smillert 	empty_key = false;
30479428148Smillert 	f2 = ks->f2;
30579428148Smillert 
30679428148Smillert 	if (f2 == 0)
30779428148Smillert 		return BWSLEN(s) + 1;
30879428148Smillert 	else {
30979428148Smillert 		if (ks->c2 == 0) {
31079428148Smillert 			next_field_start = skip_fields_to_start(s, f2 + 1,
31179428148Smillert 			    &empty_field);
31279428148Smillert 			if ((next_field_start > 0) && sort_opts_vals.tflag &&
31379428148Smillert 			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
31479428148Smillert 			    next_field_start - 1)))
31579428148Smillert 				--next_field_start;
31679428148Smillert 		} else
31779428148Smillert 			next_field_start = skip_fields_to_start(s, f2,
31879428148Smillert 			    &empty_field);
31979428148Smillert 	}
32079428148Smillert 
32179428148Smillert 	if (empty_field || (next_field_start >= BWSLEN(s)))
32279428148Smillert 		return BWSLEN(s) + 1;
32379428148Smillert 
32479428148Smillert 	if (ks->c2) {
32579428148Smillert 		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
32679428148Smillert 		    ks->pos2b, &empty_key);
32779428148Smillert 		if (pos_end < BWSLEN(s))
32879428148Smillert 			++pos_end;
32979428148Smillert 	} else
33079428148Smillert 		pos_end = next_field_start;
33179428148Smillert 
33279428148Smillert 	return pos_end;
33379428148Smillert }
33479428148Smillert 
33579428148Smillert /*
33679428148Smillert  * Cut a field according to the key specs
33779428148Smillert  */
33879428148Smillert static struct bwstring *
33979428148Smillert cut_field(const struct bwstring *s, struct key_specs *ks)
34079428148Smillert {
34179428148Smillert 	struct bwstring *ret = NULL;
34279428148Smillert 
34379428148Smillert 	if (s && ks) {
34479428148Smillert 		size_t field_start, key_end, key_start, sz;
34579428148Smillert 		bool empty_field, empty_key;
34679428148Smillert 
34779428148Smillert 		field_start = 0;
34879428148Smillert 		key_start = 0;
34979428148Smillert 		empty_field = false;
35079428148Smillert 		empty_key = false;
35179428148Smillert 
35279428148Smillert 		find_field_start(s, ks, &field_start, &key_start,
35379428148Smillert 		    &empty_field, &empty_key);
35479428148Smillert 
35579428148Smillert 		if (empty_key)
35679428148Smillert 			sz = 0;
35779428148Smillert 		else {
35879428148Smillert 			key_end = find_field_end(s, ks);
35979428148Smillert 			sz = (key_end < key_start) ? 0 : (key_end - key_start);
36079428148Smillert 		}
36179428148Smillert 
36279428148Smillert 		ret = bwsalloc(sz);
36379428148Smillert 		if (sz)
36479428148Smillert 			bwsnocpy(ret, s, key_start, sz);
36579428148Smillert 	} else
36679428148Smillert 		ret = bwsalloc(0);
36779428148Smillert 
36879428148Smillert 	return ret;
36979428148Smillert }
37079428148Smillert 
37179428148Smillert /*
37279428148Smillert  * Preprocesses a line applying the necessary transformations
37379428148Smillert  * specified by command line options and returns the preprocessed
37479428148Smillert  * string, which can be used to compare.
37579428148Smillert  */
37679428148Smillert int
37779428148Smillert preproc(struct bwstring *s, struct keys_array *ka)
37879428148Smillert {
37979428148Smillert 	if (sort_opts_vals.kflag) {
38079428148Smillert 		size_t i;
38179428148Smillert 		for (i = 0; i < keys_num; i++) {
38279428148Smillert 			struct bwstring *key;
38379428148Smillert 			struct key_specs *kspecs;
38479428148Smillert 			struct sort_mods *sm;
38579428148Smillert 
38679428148Smillert 			kspecs = &(keys[i]);
38779428148Smillert 			key = cut_field(s, kspecs);
38879428148Smillert 
38979428148Smillert 			sm = &(kspecs->sm);
39079428148Smillert 			if (sm->dflag)
39179428148Smillert 				key = dictionary_order(key);
39279428148Smillert 			else if (sm->iflag)
39379428148Smillert 				key = ignore_nonprinting(key);
39479428148Smillert 			if (sm->fflag || sm->Mflag)
39579428148Smillert 				key = ignore_case(key);
39679428148Smillert 
39779428148Smillert 			set_key_on_keys_array(ka, key, i);
39879428148Smillert 		}
39979428148Smillert 	} else {
40079428148Smillert 		struct bwstring *ret = NULL;
40179428148Smillert 		struct sort_mods *sm = default_sort_mods;
40279428148Smillert 
403352abeacSmillert #ifdef GNUSORT_COMPATIBILITY
40479428148Smillert 		if (sm->bflag) {
40579428148Smillert 			if (ret == NULL)
40679428148Smillert 				ret = bwsdup(s);
40779428148Smillert 			ret = ignore_leading_blanks(ret);
40879428148Smillert 		}
409352abeacSmillert #endif
41079428148Smillert 		if (sm->dflag) {
41179428148Smillert 			if (ret == NULL)
41279428148Smillert 				ret = bwsdup(s);
41379428148Smillert 			ret = dictionary_order(ret);
41479428148Smillert 		} else if (sm->iflag) {
41579428148Smillert 			if (ret == NULL)
41679428148Smillert 				ret = bwsdup(s);
41779428148Smillert 			ret = ignore_nonprinting(ret);
41879428148Smillert 		}
41979428148Smillert 		if (sm->fflag || sm->Mflag) {
42079428148Smillert 			if (ret == NULL)
42179428148Smillert 				ret = bwsdup(s);
42279428148Smillert 			ret = ignore_case(ret);
42379428148Smillert 		}
42479428148Smillert 		if (ret == NULL)
42579428148Smillert 			set_key_on_keys_array(ka, s, 0);
42679428148Smillert 		else
42779428148Smillert 			set_key_on_keys_array(ka, ret, 0);
42879428148Smillert 	}
42979428148Smillert 
43079428148Smillert 	return 0;
43179428148Smillert }
43279428148Smillert 
43379428148Smillert cmpcoll_t
43479428148Smillert get_sort_func(struct sort_mods *sm)
43579428148Smillert {
43679428148Smillert 	if (sm->nflag)
43779428148Smillert 		return numcoll;
43879428148Smillert 	else if (sm->hflag)
43979428148Smillert 		return hnumcoll;
44079428148Smillert 	else if (sm->gflag)
44179428148Smillert 		return gnumcoll;
44279428148Smillert 	else if (sm->Mflag)
44379428148Smillert 		return monthcoll;
44479428148Smillert 	else if (sm->Rflag)
44579428148Smillert 		return randomcoll;
44679428148Smillert 	else if (sm->Vflag)
44779428148Smillert 		return versioncoll;
44879428148Smillert 	else
44979428148Smillert 		return wstrcoll;
45079428148Smillert }
45179428148Smillert 
45279428148Smillert /*
45379428148Smillert  * Compares the given strings.  Returns a positive number if
45479428148Smillert  * the first precedes the second, a negative number if the second is
45579428148Smillert  * the preceding one, and zero if they are equal.  This function calls
45679428148Smillert  * the underlying collate functions, which done the actual comparison.
45779428148Smillert  */
45879428148Smillert int
45979428148Smillert key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
46079428148Smillert {
46179428148Smillert 	struct sort_mods *sm;
46279428148Smillert 	int res = 0;
46379428148Smillert 	size_t i;
46479428148Smillert 
46579428148Smillert 	for (i = 0; i < keys_num; ++i) {
46679428148Smillert 		sm = &(keys[i].sm);
46779428148Smillert 
46879428148Smillert 		if (sm->rflag)
46979428148Smillert 			res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
47079428148Smillert 		else
47179428148Smillert 			res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
47279428148Smillert 
47379428148Smillert 		if (res)
47479428148Smillert 			break;
47579428148Smillert 
47679428148Smillert 		/* offset applies to only the first key */
47779428148Smillert 		offset = 0;
47879428148Smillert 	}
47979428148Smillert 	return res;
48079428148Smillert }
48179428148Smillert 
48279428148Smillert /*
48379428148Smillert  * Compare two strings.
48479428148Smillert  * Plain symbol-by-symbol comparison.
48579428148Smillert  */
48679428148Smillert int
48779428148Smillert top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
48879428148Smillert {
48979428148Smillert 	if (default_sort_mods->rflag) {
49079428148Smillert 		const struct bwstring *tmp;
49179428148Smillert 
49279428148Smillert 		tmp = s1;
49379428148Smillert 		s1 = s2;
49479428148Smillert 		s2 = tmp;
49579428148Smillert 	}
49679428148Smillert 
49779428148Smillert 	return bwscoll(s1, s2, 0);
49879428148Smillert }
49979428148Smillert 
50079428148Smillert /*
50179428148Smillert  * Compare a string and a sort list item, according to the sort specs.
50279428148Smillert  */
50379428148Smillert int
50479428148Smillert str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
50579428148Smillert {
50679428148Smillert 	struct keys_array *ka1;
50779428148Smillert 	int ret = 0;
50879428148Smillert 
50979428148Smillert 	ka1 = keys_array_alloc();
51079428148Smillert 
51179428148Smillert 	preproc(str1, ka1);
51279428148Smillert 
51379428148Smillert 	sort_list_item_make_key(*ss2);
51479428148Smillert 
51579428148Smillert 	if (debug_sort) {
51679428148Smillert 		bwsprintf(stdout, str1, "; s1=<", ">");
51779428148Smillert 		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
51879428148Smillert 	}
51979428148Smillert 
52079428148Smillert 	ret = key_coll(ka1, &((*ss2)->ka), 0);
52179428148Smillert 
52279428148Smillert 	if (debug_sort)
52379428148Smillert 		printf("; cmp1=%d", ret);
52479428148Smillert 
52579428148Smillert 	clean_keys_array(str1, ka1);
52679428148Smillert 	sort_free(ka1);
52779428148Smillert 
52879428148Smillert 	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
52979428148Smillert 		ret = top_level_str_coll(str1, ((*ss2)->str));
53079428148Smillert 		if (debug_sort)
53179428148Smillert 			printf("; cmp2=%d", ret);
53279428148Smillert 	}
53379428148Smillert 
53479428148Smillert 	if (debug_sort)
53579428148Smillert 		putchar('\n');
53679428148Smillert 
53779428148Smillert 	return ret;
53879428148Smillert }
53979428148Smillert 
54079428148Smillert /*
54179428148Smillert  * Compare two sort list items, according to the sort specs.
54279428148Smillert  */
54379428148Smillert int
54479428148Smillert list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
54579428148Smillert     size_t offset)
54679428148Smillert {
54779428148Smillert 	int ret;
54879428148Smillert 
54979428148Smillert 	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
55079428148Smillert 
55179428148Smillert 	if (debug_sort) {
55279428148Smillert 		if (offset)
553f6f04aceSmmcc 			printf("; offset=%zu", offset);
55479428148Smillert 		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
55579428148Smillert 		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
55679428148Smillert 		printf("; cmp1=%d\n", ret);
55779428148Smillert 	}
55879428148Smillert 
55979428148Smillert 	if (ret)
56079428148Smillert 		return ret;
56179428148Smillert 
56279428148Smillert 	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
56379428148Smillert 		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
56479428148Smillert 		if (debug_sort)
56579428148Smillert 			printf("; cmp2=%d\n", ret);
56679428148Smillert 	}
56779428148Smillert 
56879428148Smillert 	return ret;
56979428148Smillert }
57079428148Smillert 
57179428148Smillert /*
57279428148Smillert  * Compare two sort list items, according to the sort specs.
57379428148Smillert  */
57479428148Smillert int
57579428148Smillert list_coll(const void *ss1, const void *ss2)
57679428148Smillert {
57779428148Smillert 	return list_coll_offset((struct sort_list_item **)ss1,
57879428148Smillert 	    (struct sort_list_item **)ss2, 0);
57979428148Smillert }
58079428148Smillert 
58179428148Smillert #define LSCDEF(N)											\
58279428148Smillert static int												\
58379428148Smillert list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)					\
58479428148Smillert {													\
58579428148Smillert 													\
58679428148Smillert 	return list_coll_offset(ss1, ss2, N);								\
58779428148Smillert }
58879428148Smillert 
58979428148Smillert LSCDEF(0)
59079428148Smillert LSCDEF(1)
59179428148Smillert LSCDEF(2)
59279428148Smillert LSCDEF(3)
59379428148Smillert LSCDEF(4)
59479428148Smillert LSCDEF(5)
59579428148Smillert LSCDEF(6)
59679428148Smillert LSCDEF(7)
59779428148Smillert LSCDEF(8)
59879428148Smillert LSCDEF(9)
59979428148Smillert LSCDEF(10)
60079428148Smillert LSCDEF(11)
60179428148Smillert LSCDEF(12)
60279428148Smillert LSCDEF(13)
60379428148Smillert LSCDEF(14)
60479428148Smillert LSCDEF(15)
60579428148Smillert LSCDEF(16)
60679428148Smillert LSCDEF(17)
60779428148Smillert LSCDEF(18)
60879428148Smillert LSCDEF(19)
60979428148Smillert LSCDEF(20)
61079428148Smillert 
61179428148Smillert listcoll_t
61279428148Smillert get_list_call_func(size_t offset)
61379428148Smillert {
61479428148Smillert 	static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
61579428148Smillert 	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
61679428148Smillert 	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
61779428148Smillert 	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
61879428148Smillert 	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
61979428148Smillert 	    list_coll_18, list_coll_19, list_coll_20 };
62079428148Smillert 
62179428148Smillert 	if (offset <= 20)
62279428148Smillert 		return lsarray[offset];
62379428148Smillert 
62479428148Smillert 	return list_coll_0;
62579428148Smillert }
62679428148Smillert 
62779428148Smillert /*
62879428148Smillert  * Compare two sort list items, only by their original string.
62979428148Smillert  */
63079428148Smillert int
63179428148Smillert list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
63279428148Smillert {
63379428148Smillert 	return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
63479428148Smillert }
63579428148Smillert 
63679428148Smillert /*
63779428148Smillert  * Maximum size of a number in the string (before or after decimal point)
63879428148Smillert  */
63979428148Smillert #define MAX_NUM_SIZE (128)
64079428148Smillert 
64179428148Smillert /*
64279428148Smillert  * Set suffix value
64379428148Smillert  */
64479428148Smillert static void
64579428148Smillert setsuffix(wchar_t c, unsigned char *si)
64679428148Smillert {
64779428148Smillert 	switch (c){
64879428148Smillert 	case L'k':
64979428148Smillert 	case L'K':
65079428148Smillert 		*si = 1;
65179428148Smillert 		break;
65279428148Smillert 	case L'M':
65379428148Smillert 		*si = 2;
65479428148Smillert 		break;
65579428148Smillert 	case L'G':
65679428148Smillert 		*si = 3;
65779428148Smillert 		break;
65879428148Smillert 	case L'T':
65979428148Smillert 		*si = 4;
66079428148Smillert 		break;
66179428148Smillert 	case L'P':
66279428148Smillert 		*si = 5;
66379428148Smillert 		break;
66479428148Smillert 	case L'E':
66579428148Smillert 		*si = 6;
66679428148Smillert 		break;
66779428148Smillert 	case L'Z':
66879428148Smillert 		*si = 7;
66979428148Smillert 		break;
67079428148Smillert 	case L'Y':
67179428148Smillert 		*si = 8;
67279428148Smillert 		break;
67379428148Smillert 	default:
67479428148Smillert 		*si = 0;
675*479c151dSjsg 	}
67679428148Smillert }
67779428148Smillert 
67879428148Smillert /*
67979428148Smillert  * Read string s and parse the string into a fixed-decimal-point number.
68079428148Smillert  * sign equals -1 if the number is negative (explicit plus is not allowed,
68179428148Smillert  * according to GNU sort's "info sort".
68279428148Smillert  * The number part before decimal point is in the smain, after the decimal
68379428148Smillert  * point is in sfrac, tail is the pointer to the remainder of the string.
68479428148Smillert  */
68579428148Smillert static int
68679428148Smillert read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
68779428148Smillert {
68879428148Smillert 	bwstring_iterator s;
68979428148Smillert 
69079428148Smillert 	s = bws_begin(s0);
69179428148Smillert 
69279428148Smillert 	/* always end the fraction with zero, even if we have no fraction */
69379428148Smillert 	sfrac[0] = 0;
69479428148Smillert 
69579428148Smillert 	while (iswblank(bws_get_iter_value(s)))
69679428148Smillert 		s = bws_iterator_inc(s, 1);
69779428148Smillert 
698474067aaSschwarze 	if (bws_get_iter_value(s) == L'-') {
69979428148Smillert 		*sign = -1;
70079428148Smillert 		s = bws_iterator_inc(s, 1);
70179428148Smillert 	}
70279428148Smillert 
70379428148Smillert 	// This is '0', not '\0', do not change this
70479428148Smillert 	while (iswdigit(bws_get_iter_value(s)) &&
70579428148Smillert 	    (bws_get_iter_value(s) == L'0'))
70679428148Smillert 		s = bws_iterator_inc(s, 1);
70779428148Smillert 
70879428148Smillert 	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
70979428148Smillert 		if (iswdigit(bws_get_iter_value(s))) {
71079428148Smillert 			smain[*main_len] = bws_get_iter_value(s);
71179428148Smillert 			s = bws_iterator_inc(s, 1);
71279428148Smillert 			*main_len += 1;
713474067aaSschwarze 		} else
71479428148Smillert 			break;
71579428148Smillert 	}
71679428148Smillert 
71779428148Smillert 	smain[*main_len] = 0;
71879428148Smillert 
719474067aaSschwarze 	if (bws_get_iter_value(s) == L'.') {
72079428148Smillert 		s = bws_iterator_inc(s, 1);
72179428148Smillert 		while (iswdigit(bws_get_iter_value(s)) &&
72279428148Smillert 		    *frac_len < MAX_NUM_SIZE) {
72379428148Smillert 			sfrac[*frac_len] = bws_get_iter_value(s);
72479428148Smillert 			s = bws_iterator_inc(s, 1);
72579428148Smillert 			*frac_len += 1;
72679428148Smillert 		}
72779428148Smillert 		sfrac[*frac_len] = 0;
72879428148Smillert 
72979428148Smillert 		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
73079428148Smillert 			--(*frac_len);
73179428148Smillert 			sfrac[*frac_len] = L'\0';
73279428148Smillert 		}
73379428148Smillert 	}
73479428148Smillert 
73579428148Smillert 	setsuffix(bws_get_iter_value(s), si);
73679428148Smillert 
73779428148Smillert 	if ((*main_len + *frac_len) == 0)
73879428148Smillert 		*sign = 0;
73979428148Smillert 
74079428148Smillert 	return 0;
74179428148Smillert }
74279428148Smillert 
74379428148Smillert /*
74479428148Smillert  * Implements string sort.
74579428148Smillert  */
74679428148Smillert static int
74779428148Smillert wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
74879428148Smillert {
74979428148Smillert 
75079428148Smillert 	if (debug_sort) {
75179428148Smillert 		if (offset)
752f6f04aceSmmcc 			printf("; offset=%zu\n", offset);
75379428148Smillert 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
75479428148Smillert 		printf("(%zu)", BWSLEN(kv1->k));
75579428148Smillert 		bwsprintf(stdout, kv2->k, ", k2=<", ">");
75679428148Smillert 		printf("(%zu)", BWSLEN(kv2->k));
75779428148Smillert 	}
75879428148Smillert 
75979428148Smillert 	return bwscoll(kv1->k, kv2->k, offset);
76079428148Smillert }
76179428148Smillert 
76279428148Smillert /*
76379428148Smillert  * Compare two suffixes
76479428148Smillert  */
76579428148Smillert static inline int
76679428148Smillert cmpsuffix(unsigned char si1, unsigned char si2)
76779428148Smillert {
76879428148Smillert 	return (char)si1 - (char)si2;
76979428148Smillert }
77079428148Smillert 
77179428148Smillert /*
77279428148Smillert  * Implements numeric sort for -n and -h.
77379428148Smillert  */
77479428148Smillert static int
77579428148Smillert numcoll_impl(struct key_value *kv1, struct key_value *kv2,
77679428148Smillert     size_t offset __unused, bool use_suffix)
77779428148Smillert {
77879428148Smillert 	struct bwstring *s1, *s2;
77979428148Smillert 	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
78079428148Smillert 	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
78179428148Smillert 	int cmp_res, sign1, sign2;
78279428148Smillert 	size_t frac1, frac2, main1, main2;
78379428148Smillert 	unsigned char SI1, SI2;
78479428148Smillert 	bool e1, e2, key1_read, key2_read;
78579428148Smillert 
78679428148Smillert 	s1 = kv1->k;
78779428148Smillert 	s2 = kv2->k;
78879428148Smillert 	sign1 = sign2 = 0;
78979428148Smillert 	main1 = main2 = 0;
79079428148Smillert 	frac1 = frac2 = 0;
79179428148Smillert 
79279428148Smillert 	key1_read = key2_read = false;
79379428148Smillert 
79479428148Smillert 	if (debug_sort) {
79579428148Smillert 		bwsprintf(stdout, s1, "; k1=<", ">");
79679428148Smillert 		bwsprintf(stdout, s2, ", k2=<", ">");
79779428148Smillert 	}
79879428148Smillert 
79979428148Smillert 	if (s1 == s2)
80079428148Smillert 		return 0;
80179428148Smillert 
80279428148Smillert 	if (kv1->hint->status == HS_UNINITIALIZED) {
80379428148Smillert 		/* read the number from the string */
80479428148Smillert 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
80579428148Smillert 		key1_read = true;
80679428148Smillert 		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
80779428148Smillert 		if (main1 < 1 && frac1 < 1)
80879428148Smillert 			kv1->hint->v.nh.empty=true;
80979428148Smillert 		kv1->hint->v.nh.si = SI1;
81079428148Smillert 		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
81179428148Smillert 		    HS_INITIALIZED : HS_ERROR;
81279428148Smillert 		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
81379428148Smillert 	}
81479428148Smillert 
81579428148Smillert 	if (kv2->hint->status == HS_UNINITIALIZED) {
81679428148Smillert 		/* read the number from the string */
81779428148Smillert 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
81879428148Smillert 		key2_read = true;
81979428148Smillert 		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
82079428148Smillert 		if (main2 < 1 && frac2 < 1)
82179428148Smillert 			kv2->hint->v.nh.empty=true;
82279428148Smillert 		kv2->hint->v.nh.si = SI2;
82379428148Smillert 		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
82479428148Smillert 		    HS_INITIALIZED : HS_ERROR;
82579428148Smillert 		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
82679428148Smillert 	}
82779428148Smillert 
82879428148Smillert 	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
82979428148Smillert 	    HS_INITIALIZED) {
83079428148Smillert 		unsigned long long n1, n2;
83179428148Smillert 		bool neg1, neg2;
83279428148Smillert 
83379428148Smillert 		e1 = kv1->hint->v.nh.empty;
83479428148Smillert 		e2 = kv2->hint->v.nh.empty;
83579428148Smillert 
83679428148Smillert 		if (e1 && e2)
83779428148Smillert 			return 0;
83879428148Smillert 
83979428148Smillert 		neg1 = kv1->hint->v.nh.neg;
84079428148Smillert 		neg2 = kv2->hint->v.nh.neg;
84179428148Smillert 
84279428148Smillert 		if (neg1 && !neg2)
84379428148Smillert 			return -1;
84479428148Smillert 		if (neg2 && !neg1)
84579428148Smillert 			return 1;
84679428148Smillert 
84779428148Smillert 		if (e1)
84879428148Smillert 			return neg2 ? 1 : -1;
84979428148Smillert 		else if (e2)
85079428148Smillert 			return neg1 ? -1 : 1;
85179428148Smillert 
85279428148Smillert 
85379428148Smillert 		if (use_suffix) {
85479428148Smillert 			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
85579428148Smillert 			if (cmp_res)
85679428148Smillert 				return neg1 ? -cmp_res : cmp_res;
85779428148Smillert 		}
85879428148Smillert 
85979428148Smillert 		n1 = kv1->hint->v.nh.n1;
86079428148Smillert 		n2 = kv2->hint->v.nh.n1;
86179428148Smillert 		if (n1 < n2)
86279428148Smillert 			return neg1 ? 1 : -1;
86379428148Smillert 		else if (n1 > n2)
86479428148Smillert 			return neg1 ? -1 : 1;
86579428148Smillert 	}
86679428148Smillert 
86779428148Smillert 	/* read the numbers from the strings */
86879428148Smillert 	if (!key1_read)
86979428148Smillert 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
87079428148Smillert 	if (!key2_read)
87179428148Smillert 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
87279428148Smillert 
87379428148Smillert 	e1 = ((main1 + frac1) == 0);
87479428148Smillert 	e2 = ((main2 + frac2) == 0);
87579428148Smillert 
87679428148Smillert 	if (e1 && e2)
87779428148Smillert 		return 0;
87879428148Smillert 
87979428148Smillert 	/* we know the result if the signs are different */
88079428148Smillert 	if (sign1 < 0 && sign2 >= 0)
88179428148Smillert 		return -1;
88279428148Smillert 	if (sign1 >= 0 && sign2 < 0)
88379428148Smillert 		return 1;
88479428148Smillert 
88579428148Smillert 	if (e1)
88679428148Smillert 		return (sign2 < 0) ? +1 : -1;
88779428148Smillert 	else if (e2)
88879428148Smillert 		return (sign1 < 0) ? -1 : 1;
88979428148Smillert 
89079428148Smillert 	if (use_suffix) {
89179428148Smillert 		cmp_res = cmpsuffix(SI1, SI2);
89279428148Smillert 		if (cmp_res)
89379428148Smillert 			return (sign1 < 0) ? -cmp_res : cmp_res;
89479428148Smillert 	}
89579428148Smillert 
89679428148Smillert 	/* if both numbers are empty assume that the strings are equal */
89779428148Smillert 	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
89879428148Smillert 		return 0;
89979428148Smillert 
90079428148Smillert 	/*
90179428148Smillert 	 * if the main part is of different size, we know the result
90279428148Smillert 	 * (because the leading zeros are removed)
90379428148Smillert 	 */
90479428148Smillert 	if (main1 < main2)
90579428148Smillert 		cmp_res = -1;
90679428148Smillert 	else if (main1 > main2)
90779428148Smillert 		cmp_res = +1;
90879428148Smillert 	/* if the sizes are equal then simple non-collate string compare gives the correct result */
90979428148Smillert 	else
91079428148Smillert 		cmp_res = wcscmp(smain1, smain2);
91179428148Smillert 
91279428148Smillert 	/* check fraction */
91379428148Smillert 	if (!cmp_res)
91479428148Smillert 		cmp_res = wcscmp(sfrac1, sfrac2);
91579428148Smillert 
91679428148Smillert 	if (!cmp_res)
91779428148Smillert 		return 0;
91879428148Smillert 
91979428148Smillert 	/* reverse result if the signs are negative */
92079428148Smillert 	if (sign1 < 0 && sign2 < 0)
92179428148Smillert 		cmp_res = -cmp_res;
92279428148Smillert 
92379428148Smillert 	return cmp_res;
92479428148Smillert }
92579428148Smillert 
92679428148Smillert /*
92779428148Smillert  * Implements numeric sort (-n).
92879428148Smillert  */
92979428148Smillert static int
93079428148Smillert numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
93179428148Smillert {
93279428148Smillert 	return numcoll_impl(kv1, kv2, offset, false);
93379428148Smillert }
93479428148Smillert 
93579428148Smillert /*
93679428148Smillert  * Implements 'human' numeric sort (-h).
93779428148Smillert  */
93879428148Smillert static int
93979428148Smillert hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
94079428148Smillert {
94179428148Smillert 	return numcoll_impl(kv1, kv2, offset, true);
94279428148Smillert }
94379428148Smillert 
94479428148Smillert /*
94579428148Smillert  * Implements random sort (-R).
94679428148Smillert  */
94779428148Smillert static int
94879428148Smillert randomcoll(struct key_value *kv1, struct key_value *kv2,
94979428148Smillert     size_t offset __unused)
95079428148Smillert {
95179428148Smillert 	struct bwstring *s1, *s2;
95279428148Smillert 	MD5_CTX ctx1, ctx2;
95379428148Smillert 	char *b1, *b2;
95479428148Smillert 
95579428148Smillert 	s1 = kv1->k;
95679428148Smillert 	s2 = kv2->k;
95779428148Smillert 
95879428148Smillert 	if (debug_sort) {
95979428148Smillert 		bwsprintf(stdout, s1, "; k1=<", ">");
96079428148Smillert 		bwsprintf(stdout, s2, ", k2=<", ">");
96179428148Smillert 	}
96279428148Smillert 
96379428148Smillert 	if (s1 == s2)
96479428148Smillert 		return 0;
96579428148Smillert 
96679428148Smillert 	memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
96779428148Smillert 	memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
96879428148Smillert 
96979428148Smillert 	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
97079428148Smillert 	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
97179428148Smillert 	b1 = MD5End(&ctx1, NULL);
97279428148Smillert 	b2 = MD5End(&ctx2, NULL);
97379428148Smillert 	if (b1 == NULL) {
97479428148Smillert 		if (b2 == NULL)
97579428148Smillert 			return 0;
97679428148Smillert 		else {
97779428148Smillert 			sort_free(b2);
97879428148Smillert 			return -1;
97979428148Smillert 		}
98079428148Smillert 	} else if (b2 == NULL) {
98179428148Smillert 		sort_free(b1);
98279428148Smillert 		return 1;
98379428148Smillert 	} else {
98479428148Smillert 		int cmp_res;
98579428148Smillert 
98679428148Smillert 		cmp_res = strcmp(b1, b2);
98779428148Smillert 		sort_free(b1);
98879428148Smillert 		sort_free(b2);
98979428148Smillert 
99079428148Smillert 		if (!cmp_res)
99179428148Smillert 			cmp_res = bwscoll(s1, s2, 0);
99279428148Smillert 
99379428148Smillert 		return cmp_res;
99479428148Smillert 	}
99579428148Smillert }
99679428148Smillert 
99779428148Smillert /*
99879428148Smillert  * Implements version sort (-V).
99979428148Smillert  */
100079428148Smillert static int
100179428148Smillert versioncoll(struct key_value *kv1, struct key_value *kv2,
100279428148Smillert     size_t offset __unused)
100379428148Smillert {
100479428148Smillert 	struct bwstring *s1, *s2;
100579428148Smillert 
100679428148Smillert 	s1 = kv1->k;
100779428148Smillert 	s2 = kv2->k;
100879428148Smillert 
100979428148Smillert 	if (debug_sort) {
101079428148Smillert 		bwsprintf(stdout, s1, "; k1=<", ">");
101179428148Smillert 		bwsprintf(stdout, s2, ", k2=<", ">");
101279428148Smillert 	}
101379428148Smillert 
101479428148Smillert 	if (s1 == s2)
101579428148Smillert 		return 0;
101679428148Smillert 
101779428148Smillert 	return vcmp(s1, s2);
101879428148Smillert }
101979428148Smillert 
102079428148Smillert /*
102179428148Smillert  * Check for minus infinity
102279428148Smillert  */
102379428148Smillert static inline bool
102479428148Smillert huge_minus(double d, int err1)
102579428148Smillert {
102679428148Smillert 	if (err1 == ERANGE)
102779428148Smillert 		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
102879428148Smillert 			return 1;
102979428148Smillert 
103079428148Smillert 	return 0;
103179428148Smillert }
103279428148Smillert 
103379428148Smillert /*
103479428148Smillert  * Check for plus infinity
103579428148Smillert  */
103679428148Smillert static inline bool
103779428148Smillert huge_plus(double d, int err1)
103879428148Smillert {
103979428148Smillert 	if (err1 == ERANGE)
104079428148Smillert 		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
104179428148Smillert 			return 1;
104279428148Smillert 
104379428148Smillert 	return 0;
104479428148Smillert }
104579428148Smillert 
104679428148Smillert /*
104779428148Smillert  * Check whether a function is a NAN
104879428148Smillert  */
104979428148Smillert static bool
105079428148Smillert is_nan(double d)
105179428148Smillert {
1052ec66c42dSmiod #if defined(NAN)
105379428148Smillert 	return (d == NAN || isnan(d));
1054ec66c42dSmiod #else
1055ec66c42dSmiod 	return (isnan(d));
1056ec66c42dSmiod #endif
105779428148Smillert }
105879428148Smillert 
105979428148Smillert /*
106079428148Smillert  * Compare two NANs
106179428148Smillert  */
106279428148Smillert static int
106379428148Smillert cmp_nans(double d1, double d2)
106479428148Smillert {
106579428148Smillert 	if (d1 == d2)
106679428148Smillert 		return 0;
106779428148Smillert 	return d1 < d2 ? -1 : 1;
106879428148Smillert }
106979428148Smillert 
107079428148Smillert /*
107179428148Smillert  * Implements general numeric sort (-g).
107279428148Smillert  */
107379428148Smillert static int
107479428148Smillert gnumcoll(struct key_value *kv1, struct key_value *kv2,
107579428148Smillert     size_t offset __unused)
107679428148Smillert {
107779428148Smillert 	double d1, d2;
107879428148Smillert 	int err1, err2;
107979428148Smillert 	bool empty1, empty2, key1_read, key2_read;
108079428148Smillert 
108179428148Smillert 	d1 = d2 = 0;
108279428148Smillert 	err1 = err2 = 0;
108379428148Smillert 	key1_read = key2_read = false;
108479428148Smillert 
108579428148Smillert 	if (debug_sort) {
108679428148Smillert 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
108779428148Smillert 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
108879428148Smillert 	}
108979428148Smillert 
109079428148Smillert 	if (kv1->hint->status == HS_UNINITIALIZED) {
109179428148Smillert 		errno = 0;
109279428148Smillert 		d1 = bwstod(kv1->k, &empty1);
109379428148Smillert 		err1 = errno;
109479428148Smillert 
1095cbea14daSmillert 		if (empty1) {
109679428148Smillert 			kv1->hint->v.gh.notnum = true;
1097cbea14daSmillert 			kv1->hint->status = HS_INITIALIZED;
1098cbea14daSmillert 		} else if (err1 == 0) {
109979428148Smillert 			kv1->hint->v.gh.d = d1;
110079428148Smillert 			kv1->hint->v.gh.nan = is_nan(d1);
110179428148Smillert 			kv1->hint->status = HS_INITIALIZED;
110279428148Smillert 		} else
110379428148Smillert 			kv1->hint->status = HS_ERROR;
110479428148Smillert 
110579428148Smillert 		key1_read = true;
110679428148Smillert 	}
110779428148Smillert 
110879428148Smillert 	if (kv2->hint->status == HS_UNINITIALIZED) {
110979428148Smillert 		errno = 0;
111079428148Smillert 		d2 = bwstod(kv2->k, &empty2);
111179428148Smillert 		err2 = errno;
111279428148Smillert 
1113cbea14daSmillert 		if (empty2) {
111479428148Smillert 			kv2->hint->v.gh.notnum = true;
1115cbea14daSmillert 			kv2->hint->status = HS_INITIALIZED;
1116cbea14daSmillert 		} else if (err2 == 0) {
111779428148Smillert 			kv2->hint->v.gh.d = d2;
111879428148Smillert 			kv2->hint->v.gh.nan = is_nan(d2);
111979428148Smillert 			kv2->hint->status = HS_INITIALIZED;
112079428148Smillert 		} else
112179428148Smillert 			kv2->hint->status = HS_ERROR;
112279428148Smillert 
112379428148Smillert 		key2_read = true;
112479428148Smillert 	}
112579428148Smillert 
112679428148Smillert 	if (kv1->hint->status == HS_INITIALIZED &&
112779428148Smillert 	    kv2->hint->status == HS_INITIALIZED) {
1128cbea14daSmillert #ifdef GNUSORT_COMPATIBILITY
112979428148Smillert 		if (kv1->hint->v.gh.notnum)
113079428148Smillert 			return kv2->hint->v.gh.notnum ? 0 : -1;
113179428148Smillert 		else if (kv2->hint->v.gh.notnum)
113279428148Smillert 			return 1;
1133cbea14daSmillert #else
1134cbea14daSmillert 		if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum)
1135cbea14daSmillert 			return 0;
1136cbea14daSmillert #endif
113779428148Smillert 
113879428148Smillert 		if (kv1->hint->v.gh.nan)
113979428148Smillert 			return kv2->hint->v.gh.nan ?
114079428148Smillert 			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
114179428148Smillert 		else if (kv2->hint->v.gh.nan)
114279428148Smillert 			return 1;
114379428148Smillert 
114479428148Smillert 		d1 = kv1->hint->v.gh.d;
114579428148Smillert 		d2 = kv2->hint->v.gh.d;
114679428148Smillert 
114779428148Smillert 		if (d1 < d2)
114879428148Smillert 			return -1;
114979428148Smillert 		else if (d1 > d2)
115079428148Smillert 			return 1;
115179428148Smillert 		else
115279428148Smillert 			return 0;
115379428148Smillert 	}
115479428148Smillert 
115579428148Smillert 	if (!key1_read) {
115679428148Smillert 		errno = 0;
115779428148Smillert 		d1 = bwstod(kv1->k, &empty1);
115879428148Smillert 		err1 = errno;
115979428148Smillert 	}
116079428148Smillert 
116179428148Smillert 	if (!key2_read) {
116279428148Smillert 		errno = 0;
116379428148Smillert 		d2 = bwstod(kv2->k, &empty2);
116479428148Smillert 		err2 = errno;
116579428148Smillert 	}
116679428148Smillert 
1167cbea14daSmillert 	/* Non-value case */
1168cbea14daSmillert #ifdef GNUSORT_COMPATIBILITY
116979428148Smillert 	if (empty1)
117079428148Smillert 		return empty2 ? 0 : -1;
117179428148Smillert 	else if (empty2)
117279428148Smillert 		return 1;
1173cbea14daSmillert #else
1174cbea14daSmillert 	if (empty1 && empty2)
1175cbea14daSmillert 		return 0;
1176cbea14daSmillert #endif
117779428148Smillert 
117879428148Smillert 	/* NAN case */
117979428148Smillert 	if (is_nan(d1))
118079428148Smillert 		return is_nan(d2) ? cmp_nans(d1, d2) : -1;
118179428148Smillert 	else if (is_nan(d2))
118279428148Smillert 		return 1;
118379428148Smillert 
118479428148Smillert 	/* Infinities */
118579428148Smillert 	if (err1 == ERANGE || err2 == ERANGE) {
118679428148Smillert 		/* Minus infinity case */
118779428148Smillert 		if (huge_minus(d1, err1)) {
118879428148Smillert 			if (huge_minus(d2, err2)) {
118979428148Smillert 				if (d1 == d2)
119079428148Smillert 					return 0;
119179428148Smillert 				return d1 < d2 ? -1 : 1;
119279428148Smillert 			} else
119379428148Smillert 				return -1;
119479428148Smillert 
119579428148Smillert 		} else if (huge_minus(d2, err2)) {
119679428148Smillert 			if (huge_minus(d1, err1)) {
119779428148Smillert 				if (d1 == d2)
119879428148Smillert 					return 0;
119979428148Smillert 				return d1 < d2 ? -1 : 1;
120079428148Smillert 			} else
120179428148Smillert 				return 1;
120279428148Smillert 		}
120379428148Smillert 
120479428148Smillert 		/* Plus infinity case */
120579428148Smillert 		if (huge_plus(d1, err1)) {
120679428148Smillert 			if (huge_plus(d2, err2)) {
120779428148Smillert 				if (d1 == d2)
120879428148Smillert 					return 0;
120979428148Smillert 				return d1 < d2 ? -1 : 1;
121079428148Smillert 			} else
121179428148Smillert 				return 1;
121279428148Smillert 		} else if (huge_plus(d2, err2)) {
121379428148Smillert 			if (huge_plus(d1, err1)) {
121479428148Smillert 				if (d1 == d2)
121579428148Smillert 					return 0;
121679428148Smillert 				return d1 < d2 ? -1 : 1;
121779428148Smillert 			} else
121879428148Smillert 				return -1;
121979428148Smillert 		}
122079428148Smillert 	}
122179428148Smillert 
122279428148Smillert 	if (d1 == d2)
122379428148Smillert 		return 0;
122479428148Smillert 	return d1 < d2 ? -1 : 1;
122579428148Smillert }
122679428148Smillert 
122779428148Smillert /*
122879428148Smillert  * Implements month sort (-M).
122979428148Smillert  */
123079428148Smillert static int
123179428148Smillert monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
123279428148Smillert {
123379428148Smillert 	int val1, val2;
123479428148Smillert 	bool key1_read, key2_read;
123579428148Smillert 
123679428148Smillert 	val1 = val2 = 0;
123779428148Smillert 	key1_read = key2_read = false;
123879428148Smillert 
123979428148Smillert 	if (debug_sort) {
124079428148Smillert 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
124179428148Smillert 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
124279428148Smillert 	}
124379428148Smillert 
124479428148Smillert 	if (kv1->hint->status == HS_UNINITIALIZED) {
124579428148Smillert 		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
124679428148Smillert 		key1_read = true;
124779428148Smillert 		kv1->hint->status = HS_INITIALIZED;
124879428148Smillert 	}
124979428148Smillert 
125079428148Smillert 	if (kv2->hint->status == HS_UNINITIALIZED) {
125179428148Smillert 		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
125279428148Smillert 		key2_read = true;
125379428148Smillert 		kv2->hint->status = HS_INITIALIZED;
125479428148Smillert 	}
125579428148Smillert 
125679428148Smillert 	if (kv1->hint->status == HS_INITIALIZED) {
125779428148Smillert 		val1 = kv1->hint->v.Mh.m;
125879428148Smillert 		key1_read = true;
125979428148Smillert 	}
126079428148Smillert 
126179428148Smillert 	if (kv2->hint->status == HS_INITIALIZED) {
126279428148Smillert 		val2 = kv2->hint->v.Mh.m;
126379428148Smillert 		key2_read = true;
126479428148Smillert 	}
126579428148Smillert 
126679428148Smillert 	if (!key1_read)
126779428148Smillert 		val1 = bws_month_score(kv1->k);
126879428148Smillert 	if (!key2_read)
126979428148Smillert 		val2 = bws_month_score(kv2->k);
127079428148Smillert 
127179428148Smillert 	if (val1 == val2)
127279428148Smillert 		return 0;
127379428148Smillert 	return val1 < val2 ? -1 : 1;
127479428148Smillert }
1275