xref: /openbsd-src/usr.bin/sort/coll.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: coll.c,v 1.11 2015/12/11 21:41:51 mmcc Exp $	*/
2 
3 /*-
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/types.h>
31 
32 #include <errno.h>
33 #include <err.h>
34 #include <langinfo.h>
35 #include <limits.h>
36 #include <math.h>
37 #include <md5.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42 
43 #include "coll.h"
44 #include "vsort.h"
45 
46 struct key_specs *keys;
47 size_t keys_num = 0;
48 
49 wint_t symbol_decimal_point = L'.';
50 /* there is no default thousands separator in collate rules: */
51 wint_t symbol_thousands_sep = 0;
52 wint_t symbol_negative_sign = L'-';
53 wint_t symbol_positive_sign = L'+';
54 
55 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
56 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
57 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
58 static int numcoll(struct key_value*, struct key_value *, size_t offset);
59 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
60 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
61 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
62 
63 /*
64  * Allocate keys array
65  */
66 struct keys_array *
67 keys_array_alloc(void)
68 {
69 	struct keys_array *ka;
70 	size_t sz;
71 
72 	sz = keys_array_size();
73 	ka = sort_calloc(1, sz);
74 
75 	return ka;
76 }
77 
78 /*
79  * Calculate whether we need key hint space
80  */
81 static size_t
82 key_hint_size(void)
83 {
84 	return need_hint ? sizeof(struct key_hint) : 0;
85 }
86 
87 /*
88  * Calculate keys array size
89  */
90 size_t
91 keys_array_size(void)
92 {
93 	return keys_num * (sizeof(struct key_value) + key_hint_size());
94 }
95 
96 /*
97  * Clean data of keys array
98  */
99 void
100 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
101 {
102 	if (ka) {
103 		size_t i;
104 
105 		for (i = 0; i < keys_num; ++i)
106 			if (ka->key[i].k && ka->key[i].k != s)
107 				bwsfree(ka->key[i].k);
108 		memset(ka, 0, keys_array_size());
109 	}
110 }
111 
112 /*
113  * Set value of a key in the keys set
114  */
115 void
116 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
117 {
118 	if (ka && keys_num > ind) {
119 		struct key_value *kv;
120 
121 		kv = &(ka->key[ind]);
122 
123 		if (kv->k != s)
124 			bwsfree(kv->k);
125 		kv->k = s;
126 	}
127 }
128 
129 /*
130  * Initialize a sort list item
131  */
132 struct sort_list_item *
133 sort_list_item_alloc(void)
134 {
135 	struct sort_list_item *si;
136 	size_t sz;
137 
138 	sz = sizeof(struct sort_list_item) + keys_array_size();
139 	si = sort_calloc(1, sz);
140 
141 	return si;
142 }
143 
144 size_t
145 sort_list_item_size(struct sort_list_item *si)
146 {
147 	size_t i, ret = 0;
148 
149 	if (si) {
150 		ret = sizeof(struct sort_list_item) + keys_array_size();
151 		if (si->str)
152 			ret += bws_memsize(si->str);
153 		for (i = 0; i < keys_num; ++i) {
154 			struct key_value *kv;
155 
156 			kv = &(si->ka.key[i]);
157 
158 			if (kv->k != si->str)
159 				ret += bws_memsize(kv->k);
160 		}
161 	}
162 	return ret;
163 }
164 
165 /*
166  * Calculate key for a sort list item
167  */
168 static void
169 sort_list_item_make_key(struct sort_list_item *si)
170 {
171 	preproc(si->str, &(si->ka));
172 }
173 
174 /*
175  * Set value of a sort list item.
176  * Return combined string and keys memory size.
177  */
178 void
179 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
180 {
181 	if (si) {
182 		clean_keys_array(si->str, &(si->ka));
183 		if (si->str) {
184 			if (si->str == str) {
185 				/* we are trying to reset the same string */
186 				return;
187 			} else {
188 				bwsfree(si->str);
189 				si->str = NULL;
190 			}
191 		}
192 		si->str = str;
193 		sort_list_item_make_key(si);
194 	}
195 }
196 
197 /*
198  * De-allocate a sort list item object memory
199  */
200 void
201 sort_list_item_clean(struct sort_list_item *si)
202 {
203 	if (si) {
204 		clean_keys_array(si->str, &(si->ka));
205 		if (si->str) {
206 			bwsfree(si->str);
207 			si->str = NULL;
208 		}
209 	}
210 }
211 
212 /*
213  * Skip columns according to specs
214  */
215 static size_t
216 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
217     bool skip_blanks, bool *empty_key)
218 {
219 	if (cols < 1)
220 		return BWSLEN(s) + 1;
221 
222 	if (skip_blanks)
223 		while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
224 			++start;
225 
226 	while (start < BWSLEN(s) && cols > 1) {
227 		--cols;
228 		++start;
229 	}
230 
231 	if (start >= BWSLEN(s))
232 		*empty_key = true;
233 
234 	return start;
235 }
236 
237 /*
238  * Skip fields according to specs
239  */
240 static size_t
241 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
242 {
243 	if (fields < 2) {
244 		if (BWSLEN(s) == 0)
245 			*empty_field = true;
246 		return 0;
247 	} else if (!(sort_opts_vals.tflag)) {
248 		size_t cpos = 0;
249 		bool pb = true;
250 
251 		while (cpos < BWSLEN(s)) {
252 			bool isblank;
253 
254 			isblank = iswblank(BWS_GET(s, cpos));
255 
256 			if (isblank && !pb) {
257 				--fields;
258 				if (fields <= 1)
259 					return cpos;
260 			}
261 			pb = isblank;
262 			++cpos;
263 		}
264 		if (fields > 1)
265 			*empty_field = true;
266 		return cpos;
267 	} else {
268 		size_t cpos = 0;
269 
270 		while (cpos < BWSLEN(s)) {
271 			if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
272 				--fields;
273 				if (fields <= 1)
274 					return cpos + 1;
275 			}
276 			++cpos;
277 		}
278 		if (fields > 1)
279 			*empty_field = true;
280 		return cpos;
281 	}
282 }
283 
284 /*
285  * Find fields start
286  */
287 static void
288 find_field_start(const struct bwstring *s, struct key_specs *ks,
289     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
290 {
291 	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
292 	if (!*empty_field)
293 		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
294 		    ks->pos1b, empty_key);
295 	else
296 		*empty_key = true;
297 }
298 
299 /*
300  * Find end key position
301  */
302 static size_t
303 find_field_end(const struct bwstring *s, struct key_specs *ks)
304 {
305 	size_t f2, next_field_start, pos_end;
306 	bool empty_field, empty_key;
307 
308 	empty_field = false;
309 	empty_key = false;
310 	f2 = ks->f2;
311 
312 	if (f2 == 0)
313 		return BWSLEN(s) + 1;
314 	else {
315 		if (ks->c2 == 0) {
316 			next_field_start = skip_fields_to_start(s, f2 + 1,
317 			    &empty_field);
318 			if ((next_field_start > 0) && sort_opts_vals.tflag &&
319 			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
320 			    next_field_start - 1)))
321 				--next_field_start;
322 		} else
323 			next_field_start = skip_fields_to_start(s, f2,
324 			    &empty_field);
325 	}
326 
327 	if (empty_field || (next_field_start >= BWSLEN(s)))
328 		return BWSLEN(s) + 1;
329 
330 	if (ks->c2) {
331 		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
332 		    ks->pos2b, &empty_key);
333 		if (pos_end < BWSLEN(s))
334 			++pos_end;
335 	} else
336 		pos_end = next_field_start;
337 
338 	return pos_end;
339 }
340 
341 /*
342  * Cut a field according to the key specs
343  */
344 static struct bwstring *
345 cut_field(const struct bwstring *s, struct key_specs *ks)
346 {
347 	struct bwstring *ret = NULL;
348 
349 	if (s && ks) {
350 		size_t field_start, key_end, key_start, sz;
351 		bool empty_field, empty_key;
352 
353 		field_start = 0;
354 		key_start = 0;
355 		empty_field = false;
356 		empty_key = false;
357 
358 		find_field_start(s, ks, &field_start, &key_start,
359 		    &empty_field, &empty_key);
360 
361 		if (empty_key)
362 			sz = 0;
363 		else {
364 			key_end = find_field_end(s, ks);
365 			sz = (key_end < key_start) ? 0 : (key_end - key_start);
366 		}
367 
368 		ret = bwsalloc(sz);
369 		if (sz)
370 			bwsnocpy(ret, s, key_start, sz);
371 	} else
372 		ret = bwsalloc(0);
373 
374 	return ret;
375 }
376 
377 /*
378  * Preprocesses a line applying the necessary transformations
379  * specified by command line options and returns the preprocessed
380  * string, which can be used to compare.
381  */
382 int
383 preproc(struct bwstring *s, struct keys_array *ka)
384 {
385 	if (sort_opts_vals.kflag) {
386 		size_t i;
387 		for (i = 0; i < keys_num; i++) {
388 			struct bwstring *key;
389 			struct key_specs *kspecs;
390 			struct sort_mods *sm;
391 
392 			kspecs = &(keys[i]);
393 			key = cut_field(s, kspecs);
394 
395 			sm = &(kspecs->sm);
396 			if (sm->dflag)
397 				key = dictionary_order(key);
398 			else if (sm->iflag)
399 				key = ignore_nonprinting(key);
400 			if (sm->fflag || sm->Mflag)
401 				key = ignore_case(key);
402 
403 			set_key_on_keys_array(ka, key, i);
404 		}
405 	} else {
406 		struct bwstring *ret = NULL;
407 		struct sort_mods *sm = default_sort_mods;
408 
409 #ifdef GNUSORT_COMPATIBILITY
410 		if (sm->bflag) {
411 			if (ret == NULL)
412 				ret = bwsdup(s);
413 			ret = ignore_leading_blanks(ret);
414 		}
415 #endif
416 		if (sm->dflag) {
417 			if (ret == NULL)
418 				ret = bwsdup(s);
419 			ret = dictionary_order(ret);
420 		} else if (sm->iflag) {
421 			if (ret == NULL)
422 				ret = bwsdup(s);
423 			ret = ignore_nonprinting(ret);
424 		}
425 		if (sm->fflag || sm->Mflag) {
426 			if (ret == NULL)
427 				ret = bwsdup(s);
428 			ret = ignore_case(ret);
429 		}
430 		if (ret == NULL)
431 			set_key_on_keys_array(ka, s, 0);
432 		else
433 			set_key_on_keys_array(ka, ret, 0);
434 	}
435 
436 	return 0;
437 }
438 
439 cmpcoll_t
440 get_sort_func(struct sort_mods *sm)
441 {
442 	if (sm->nflag)
443 		return numcoll;
444 	else if (sm->hflag)
445 		return hnumcoll;
446 	else if (sm->gflag)
447 		return gnumcoll;
448 	else if (sm->Mflag)
449 		return monthcoll;
450 	else if (sm->Rflag)
451 		return randomcoll;
452 	else if (sm->Vflag)
453 		return versioncoll;
454 	else
455 		return wstrcoll;
456 }
457 
458 /*
459  * Compares the given strings.  Returns a positive number if
460  * the first precedes the second, a negative number if the second is
461  * the preceding one, and zero if they are equal.  This function calls
462  * the underlying collate functions, which done the actual comparison.
463  */
464 int
465 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
466 {
467 	struct sort_mods *sm;
468 	int res = 0;
469 	size_t i;
470 
471 	for (i = 0; i < keys_num; ++i) {
472 		sm = &(keys[i].sm);
473 
474 		if (sm->rflag)
475 			res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
476 		else
477 			res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
478 
479 		if (res)
480 			break;
481 
482 		/* offset applies to only the first key */
483 		offset = 0;
484 	}
485 	return res;
486 }
487 
488 /*
489  * Compare two strings.
490  * Plain symbol-by-symbol comparison.
491  */
492 int
493 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
494 {
495 	if (default_sort_mods->rflag) {
496 		const struct bwstring *tmp;
497 
498 		tmp = s1;
499 		s1 = s2;
500 		s2 = tmp;
501 	}
502 
503 	return bwscoll(s1, s2, 0);
504 }
505 
506 /*
507  * Compare a string and a sort list item, according to the sort specs.
508  */
509 int
510 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
511 {
512 	struct keys_array *ka1;
513 	int ret = 0;
514 
515 	ka1 = keys_array_alloc();
516 
517 	preproc(str1, ka1);
518 
519 	sort_list_item_make_key(*ss2);
520 
521 	if (debug_sort) {
522 		bwsprintf(stdout, str1, "; s1=<", ">");
523 		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
524 	}
525 
526 	ret = key_coll(ka1, &((*ss2)->ka), 0);
527 
528 	if (debug_sort)
529 		printf("; cmp1=%d", ret);
530 
531 	clean_keys_array(str1, ka1);
532 	sort_free(ka1);
533 
534 	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
535 		ret = top_level_str_coll(str1, ((*ss2)->str));
536 		if (debug_sort)
537 			printf("; cmp2=%d", ret);
538 	}
539 
540 	if (debug_sort)
541 		putchar('\n');
542 
543 	return ret;
544 }
545 
546 /*
547  * Compare two sort list items, according to the sort specs.
548  */
549 int
550 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
551     size_t offset)
552 {
553 	int ret;
554 
555 	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
556 
557 	if (debug_sort) {
558 		if (offset)
559 			printf("; offset=%zu", offset);
560 		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
561 		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
562 		printf("; cmp1=%d\n", ret);
563 	}
564 
565 	if (ret)
566 		return ret;
567 
568 	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
569 		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
570 		if (debug_sort)
571 			printf("; cmp2=%d\n", ret);
572 	}
573 
574 	return ret;
575 }
576 
577 /*
578  * Compare two sort list items, according to the sort specs.
579  */
580 int
581 list_coll(const void *ss1, const void *ss2)
582 {
583 	return list_coll_offset((struct sort_list_item **)ss1,
584 	    (struct sort_list_item **)ss2, 0);
585 }
586 
587 #define LSCDEF(N)											\
588 static int												\
589 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)					\
590 {													\
591 													\
592 	return list_coll_offset(ss1, ss2, N);								\
593 }
594 
595 LSCDEF(0)
596 LSCDEF(1)
597 LSCDEF(2)
598 LSCDEF(3)
599 LSCDEF(4)
600 LSCDEF(5)
601 LSCDEF(6)
602 LSCDEF(7)
603 LSCDEF(8)
604 LSCDEF(9)
605 LSCDEF(10)
606 LSCDEF(11)
607 LSCDEF(12)
608 LSCDEF(13)
609 LSCDEF(14)
610 LSCDEF(15)
611 LSCDEF(16)
612 LSCDEF(17)
613 LSCDEF(18)
614 LSCDEF(19)
615 LSCDEF(20)
616 
617 listcoll_t
618 get_list_call_func(size_t offset)
619 {
620 	static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
621 	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
622 	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
623 	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
624 	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
625 	    list_coll_18, list_coll_19, list_coll_20 };
626 
627 	if (offset <= 20)
628 		return lsarray[offset];
629 
630 	return list_coll_0;
631 }
632 
633 /*
634  * Compare two sort list items, only by their original string.
635  */
636 int
637 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
638 {
639 	return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
640 }
641 
642 /*
643  * Maximum size of a number in the string (before or after decimal point)
644  */
645 #define MAX_NUM_SIZE (128)
646 
647 /*
648  * Set suffix value
649  */
650 static void
651 setsuffix(wchar_t c, unsigned char *si)
652 {
653 	switch (c){
654 	case L'k':
655 	case L'K':
656 		*si = 1;
657 		break;
658 	case L'M':
659 		*si = 2;
660 		break;
661 	case L'G':
662 		*si = 3;
663 		break;
664 	case L'T':
665 		*si = 4;
666 		break;
667 	case L'P':
668 		*si = 5;
669 		break;
670 	case L'E':
671 		*si = 6;
672 		break;
673 	case L'Z':
674 		*si = 7;
675 		break;
676 	case L'Y':
677 		*si = 8;
678 		break;
679 	default:
680 		*si = 0;
681 	};
682 }
683 
684 /*
685  * Read string s and parse the string into a fixed-decimal-point number.
686  * sign equals -1 if the number is negative (explicit plus is not allowed,
687  * according to GNU sort's "info sort".
688  * The number part before decimal point is in the smain, after the decimal
689  * point is in sfrac, tail is the pointer to the remainder of the string.
690  */
691 static int
692 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
693 {
694 	bwstring_iterator s;
695 
696 	s = bws_begin(s0);
697 
698 	/* always end the fraction with zero, even if we have no fraction */
699 	sfrac[0] = 0;
700 
701 	while (iswblank(bws_get_iter_value(s)))
702 		s = bws_iterator_inc(s, 1);
703 
704 	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
705 		*sign = -1;
706 		s = bws_iterator_inc(s, 1);
707 	}
708 
709 	// This is '0', not '\0', do not change this
710 	while (iswdigit(bws_get_iter_value(s)) &&
711 	    (bws_get_iter_value(s) == L'0'))
712 		s = bws_iterator_inc(s, 1);
713 
714 	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
715 		if (iswdigit(bws_get_iter_value(s))) {
716 			smain[*main_len] = bws_get_iter_value(s);
717 			s = bws_iterator_inc(s, 1);
718 			*main_len += 1;
719 		} else if (symbol_thousands_sep &&
720 		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
721 			s = bws_iterator_inc(s, 1);
722 		else
723 			break;
724 	}
725 
726 	smain[*main_len] = 0;
727 
728 	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
729 		s = bws_iterator_inc(s, 1);
730 		while (iswdigit(bws_get_iter_value(s)) &&
731 		    *frac_len < MAX_NUM_SIZE) {
732 			sfrac[*frac_len] = bws_get_iter_value(s);
733 			s = bws_iterator_inc(s, 1);
734 			*frac_len += 1;
735 		}
736 		sfrac[*frac_len] = 0;
737 
738 		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
739 			--(*frac_len);
740 			sfrac[*frac_len] = L'\0';
741 		}
742 	}
743 
744 	setsuffix(bws_get_iter_value(s), si);
745 
746 	if ((*main_len + *frac_len) == 0)
747 		*sign = 0;
748 
749 	return 0;
750 }
751 
752 /*
753  * Implements string sort.
754  */
755 static int
756 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
757 {
758 
759 	if (debug_sort) {
760 		if (offset)
761 			printf("; offset=%zu\n", offset);
762 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
763 		printf("(%zu)", BWSLEN(kv1->k));
764 		bwsprintf(stdout, kv2->k, ", k2=<", ">");
765 		printf("(%zu)", BWSLEN(kv2->k));
766 	}
767 
768 	return bwscoll(kv1->k, kv2->k, offset);
769 }
770 
771 /*
772  * Compare two suffixes
773  */
774 static inline int
775 cmpsuffix(unsigned char si1, unsigned char si2)
776 {
777 	return (char)si1 - (char)si2;
778 }
779 
780 /*
781  * Implements numeric sort for -n and -h.
782  */
783 static int
784 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
785     size_t offset __unused, bool use_suffix)
786 {
787 	struct bwstring *s1, *s2;
788 	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
789 	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
790 	int cmp_res, sign1, sign2;
791 	size_t frac1, frac2, main1, main2;
792 	unsigned char SI1, SI2;
793 	bool e1, e2, key1_read, key2_read;
794 
795 	s1 = kv1->k;
796 	s2 = kv2->k;
797 	sign1 = sign2 = 0;
798 	main1 = main2 = 0;
799 	frac1 = frac2 = 0;
800 
801 	key1_read = key2_read = false;
802 
803 	if (debug_sort) {
804 		bwsprintf(stdout, s1, "; k1=<", ">");
805 		bwsprintf(stdout, s2, ", k2=<", ">");
806 	}
807 
808 	if (s1 == s2)
809 		return 0;
810 
811 	if (kv1->hint->status == HS_UNINITIALIZED) {
812 		/* read the number from the string */
813 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
814 		key1_read = true;
815 		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
816 		if (main1 < 1 && frac1 < 1)
817 			kv1->hint->v.nh.empty=true;
818 		kv1->hint->v.nh.si = SI1;
819 		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
820 		    HS_INITIALIZED : HS_ERROR;
821 		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
822 	}
823 
824 	if (kv2->hint->status == HS_UNINITIALIZED) {
825 		/* read the number from the string */
826 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
827 		key2_read = true;
828 		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
829 		if (main2 < 1 && frac2 < 1)
830 			kv2->hint->v.nh.empty=true;
831 		kv2->hint->v.nh.si = SI2;
832 		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
833 		    HS_INITIALIZED : HS_ERROR;
834 		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
835 	}
836 
837 	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
838 	    HS_INITIALIZED) {
839 		unsigned long long n1, n2;
840 		bool neg1, neg2;
841 
842 		e1 = kv1->hint->v.nh.empty;
843 		e2 = kv2->hint->v.nh.empty;
844 
845 		if (e1 && e2)
846 			return 0;
847 
848 		neg1 = kv1->hint->v.nh.neg;
849 		neg2 = kv2->hint->v.nh.neg;
850 
851 		if (neg1 && !neg2)
852 			return -1;
853 		if (neg2 && !neg1)
854 			return 1;
855 
856 		if (e1)
857 			return neg2 ? 1 : -1;
858 		else if (e2)
859 			return neg1 ? -1 : 1;
860 
861 
862 		if (use_suffix) {
863 			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
864 			if (cmp_res)
865 				return neg1 ? -cmp_res : cmp_res;
866 		}
867 
868 		n1 = kv1->hint->v.nh.n1;
869 		n2 = kv2->hint->v.nh.n1;
870 		if (n1 < n2)
871 			return neg1 ? 1 : -1;
872 		else if (n1 > n2)
873 			return neg1 ? -1 : 1;
874 	}
875 
876 	/* read the numbers from the strings */
877 	if (!key1_read)
878 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
879 	if (!key2_read)
880 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
881 
882 	e1 = ((main1 + frac1) == 0);
883 	e2 = ((main2 + frac2) == 0);
884 
885 	if (e1 && e2)
886 		return 0;
887 
888 	/* we know the result if the signs are different */
889 	if (sign1 < 0 && sign2 >= 0)
890 		return -1;
891 	if (sign1 >= 0 && sign2 < 0)
892 		return 1;
893 
894 	if (e1)
895 		return (sign2 < 0) ? +1 : -1;
896 	else if (e2)
897 		return (sign1 < 0) ? -1 : 1;
898 
899 	if (use_suffix) {
900 		cmp_res = cmpsuffix(SI1, SI2);
901 		if (cmp_res)
902 			return (sign1 < 0) ? -cmp_res : cmp_res;
903 	}
904 
905 	/* if both numbers are empty assume that the strings are equal */
906 	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
907 		return 0;
908 
909 	/*
910 	 * if the main part is of different size, we know the result
911 	 * (because the leading zeros are removed)
912 	 */
913 	if (main1 < main2)
914 		cmp_res = -1;
915 	else if (main1 > main2)
916 		cmp_res = +1;
917 	/* if the sizes are equal then simple non-collate string compare gives the correct result */
918 	else
919 		cmp_res = wcscmp(smain1, smain2);
920 
921 	/* check fraction */
922 	if (!cmp_res)
923 		cmp_res = wcscmp(sfrac1, sfrac2);
924 
925 	if (!cmp_res)
926 		return 0;
927 
928 	/* reverse result if the signs are negative */
929 	if (sign1 < 0 && sign2 < 0)
930 		cmp_res = -cmp_res;
931 
932 	return cmp_res;
933 }
934 
935 /*
936  * Implements numeric sort (-n).
937  */
938 static int
939 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
940 {
941 	return numcoll_impl(kv1, kv2, offset, false);
942 }
943 
944 /*
945  * Implements 'human' numeric sort (-h).
946  */
947 static int
948 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
949 {
950 	return numcoll_impl(kv1, kv2, offset, true);
951 }
952 
953 /*
954  * Implements random sort (-R).
955  */
956 static int
957 randomcoll(struct key_value *kv1, struct key_value *kv2,
958     size_t offset __unused)
959 {
960 	struct bwstring *s1, *s2;
961 	MD5_CTX ctx1, ctx2;
962 	char *b1, *b2;
963 
964 	s1 = kv1->k;
965 	s2 = kv2->k;
966 
967 	if (debug_sort) {
968 		bwsprintf(stdout, s1, "; k1=<", ">");
969 		bwsprintf(stdout, s2, ", k2=<", ">");
970 	}
971 
972 	if (s1 == s2)
973 		return 0;
974 
975 	memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
976 	memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
977 
978 	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
979 	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
980 	b1 = MD5End(&ctx1, NULL);
981 	b2 = MD5End(&ctx2, NULL);
982 	if (b1 == NULL) {
983 		if (b2 == NULL)
984 			return 0;
985 		else {
986 			sort_free(b2);
987 			return -1;
988 		}
989 	} else if (b2 == NULL) {
990 		sort_free(b1);
991 		return 1;
992 	} else {
993 		int cmp_res;
994 
995 		cmp_res = strcmp(b1, b2);
996 		sort_free(b1);
997 		sort_free(b2);
998 
999 		if (!cmp_res)
1000 			cmp_res = bwscoll(s1, s2, 0);
1001 
1002 		return cmp_res;
1003 	}
1004 }
1005 
1006 /*
1007  * Implements version sort (-V).
1008  */
1009 static int
1010 versioncoll(struct key_value *kv1, struct key_value *kv2,
1011     size_t offset __unused)
1012 {
1013 	struct bwstring *s1, *s2;
1014 
1015 	s1 = kv1->k;
1016 	s2 = kv2->k;
1017 
1018 	if (debug_sort) {
1019 		bwsprintf(stdout, s1, "; k1=<", ">");
1020 		bwsprintf(stdout, s2, ", k2=<", ">");
1021 	}
1022 
1023 	if (s1 == s2)
1024 		return 0;
1025 
1026 	return vcmp(s1, s2);
1027 }
1028 
1029 /*
1030  * Check for minus infinity
1031  */
1032 static inline bool
1033 huge_minus(double d, int err1)
1034 {
1035 	if (err1 == ERANGE)
1036 		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1037 			return 1;
1038 
1039 	return 0;
1040 }
1041 
1042 /*
1043  * Check for plus infinity
1044  */
1045 static inline bool
1046 huge_plus(double d, int err1)
1047 {
1048 	if (err1 == ERANGE)
1049 		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1050 			return 1;
1051 
1052 	return 0;
1053 }
1054 
1055 /*
1056  * Check whether a function is a NAN
1057  */
1058 static bool
1059 is_nan(double d)
1060 {
1061 #if defined(NAN)
1062 	return (d == NAN || isnan(d));
1063 #else
1064 	return (isnan(d));
1065 #endif
1066 }
1067 
1068 /*
1069  * Compare two NANs
1070  */
1071 static int
1072 cmp_nans(double d1, double d2)
1073 {
1074 	if (d1 == d2)
1075 		return 0;
1076 	return d1 < d2 ? -1 : 1;
1077 }
1078 
1079 /*
1080  * Implements general numeric sort (-g).
1081  */
1082 static int
1083 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1084     size_t offset __unused)
1085 {
1086 	double d1, d2;
1087 	int err1, err2;
1088 	bool empty1, empty2, key1_read, key2_read;
1089 
1090 	d1 = d2 = 0;
1091 	err1 = err2 = 0;
1092 	key1_read = key2_read = false;
1093 
1094 	if (debug_sort) {
1095 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1096 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1097 	}
1098 
1099 	if (kv1->hint->status == HS_UNINITIALIZED) {
1100 		errno = 0;
1101 		d1 = bwstod(kv1->k, &empty1);
1102 		err1 = errno;
1103 
1104 		if (empty1) {
1105 			kv1->hint->v.gh.notnum = true;
1106 			kv1->hint->status = HS_INITIALIZED;
1107 		} else if (err1 == 0) {
1108 			kv1->hint->v.gh.d = d1;
1109 			kv1->hint->v.gh.nan = is_nan(d1);
1110 			kv1->hint->status = HS_INITIALIZED;
1111 		} else
1112 			kv1->hint->status = HS_ERROR;
1113 
1114 		key1_read = true;
1115 	}
1116 
1117 	if (kv2->hint->status == HS_UNINITIALIZED) {
1118 		errno = 0;
1119 		d2 = bwstod(kv2->k, &empty2);
1120 		err2 = errno;
1121 
1122 		if (empty2) {
1123 			kv2->hint->v.gh.notnum = true;
1124 			kv2->hint->status = HS_INITIALIZED;
1125 		} else if (err2 == 0) {
1126 			kv2->hint->v.gh.d = d2;
1127 			kv2->hint->v.gh.nan = is_nan(d2);
1128 			kv2->hint->status = HS_INITIALIZED;
1129 		} else
1130 			kv2->hint->status = HS_ERROR;
1131 
1132 		key2_read = true;
1133 	}
1134 
1135 	if (kv1->hint->status == HS_INITIALIZED &&
1136 	    kv2->hint->status == HS_INITIALIZED) {
1137 #ifdef GNUSORT_COMPATIBILITY
1138 		if (kv1->hint->v.gh.notnum)
1139 			return kv2->hint->v.gh.notnum ? 0 : -1;
1140 		else if (kv2->hint->v.gh.notnum)
1141 			return 1;
1142 #else
1143 		if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum)
1144 			return 0;
1145 #endif
1146 
1147 		if (kv1->hint->v.gh.nan)
1148 			return kv2->hint->v.gh.nan ?
1149 			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
1150 		else if (kv2->hint->v.gh.nan)
1151 			return 1;
1152 
1153 		d1 = kv1->hint->v.gh.d;
1154 		d2 = kv2->hint->v.gh.d;
1155 
1156 		if (d1 < d2)
1157 			return -1;
1158 		else if (d1 > d2)
1159 			return 1;
1160 		else
1161 			return 0;
1162 	}
1163 
1164 	if (!key1_read) {
1165 		errno = 0;
1166 		d1 = bwstod(kv1->k, &empty1);
1167 		err1 = errno;
1168 	}
1169 
1170 	if (!key2_read) {
1171 		errno = 0;
1172 		d2 = bwstod(kv2->k, &empty2);
1173 		err2 = errno;
1174 	}
1175 
1176 	/* Non-value case */
1177 #ifdef GNUSORT_COMPATIBILITY
1178 	if (empty1)
1179 		return empty2 ? 0 : -1;
1180 	else if (empty2)
1181 		return 1;
1182 #else
1183 	if (empty1 && empty2)
1184 		return 0;
1185 #endif
1186 
1187 	/* NAN case */
1188 	if (is_nan(d1))
1189 		return is_nan(d2) ? cmp_nans(d1, d2) : -1;
1190 	else if (is_nan(d2))
1191 		return 1;
1192 
1193 	/* Infinities */
1194 	if (err1 == ERANGE || err2 == ERANGE) {
1195 		/* Minus infinity case */
1196 		if (huge_minus(d1, err1)) {
1197 			if (huge_minus(d2, err2)) {
1198 				if (d1 == d2)
1199 					return 0;
1200 				return d1 < d2 ? -1 : 1;
1201 			} else
1202 				return -1;
1203 
1204 		} else if (huge_minus(d2, err2)) {
1205 			if (huge_minus(d1, err1)) {
1206 				if (d1 == d2)
1207 					return 0;
1208 				return d1 < d2 ? -1 : 1;
1209 			} else
1210 				return 1;
1211 		}
1212 
1213 		/* Plus infinity case */
1214 		if (huge_plus(d1, err1)) {
1215 			if (huge_plus(d2, err2)) {
1216 				if (d1 == d2)
1217 					return 0;
1218 				return d1 < d2 ? -1 : 1;
1219 			} else
1220 				return 1;
1221 		} else if (huge_plus(d2, err2)) {
1222 			if (huge_plus(d1, err1)) {
1223 				if (d1 == d2)
1224 					return 0;
1225 				return d1 < d2 ? -1 : 1;
1226 			} else
1227 				return -1;
1228 		}
1229 	}
1230 
1231 	if (d1 == d2)
1232 		return 0;
1233 	return d1 < d2 ? -1 : 1;
1234 }
1235 
1236 /*
1237  * Implements month sort (-M).
1238  */
1239 static int
1240 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1241 {
1242 	int val1, val2;
1243 	bool key1_read, key2_read;
1244 
1245 	val1 = val2 = 0;
1246 	key1_read = key2_read = false;
1247 
1248 	if (debug_sort) {
1249 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1250 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1251 	}
1252 
1253 	if (kv1->hint->status == HS_UNINITIALIZED) {
1254 		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1255 		key1_read = true;
1256 		kv1->hint->status = HS_INITIALIZED;
1257 	}
1258 
1259 	if (kv2->hint->status == HS_UNINITIALIZED) {
1260 		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1261 		key2_read = true;
1262 		kv2->hint->status = HS_INITIALIZED;
1263 	}
1264 
1265 	if (kv1->hint->status == HS_INITIALIZED) {
1266 		val1 = kv1->hint->v.Mh.m;
1267 		key1_read = true;
1268 	}
1269 
1270 	if (kv2->hint->status == HS_INITIALIZED) {
1271 		val2 = kv2->hint->v.Mh.m;
1272 		key2_read = true;
1273 	}
1274 
1275 	if (!key1_read)
1276 		val1 = bws_month_score(kv1->k);
1277 	if (!key2_read)
1278 		val2 = bws_month_score(kv2->k);
1279 
1280 	if (val1 == val2)
1281 		return 0;
1282 	return val1 < val2 ? -1 : 1;
1283 }
1284