1 /* $OpenBSD: coll.c,v 1.11 2015/12/11 21:41:51 mmcc Exp $ */ 2 3 /*- 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/types.h> 31 32 #include <errno.h> 33 #include <err.h> 34 #include <langinfo.h> 35 #include <limits.h> 36 #include <math.h> 37 #include <md5.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <wchar.h> 41 #include <wctype.h> 42 43 #include "coll.h" 44 #include "vsort.h" 45 46 struct key_specs *keys; 47 size_t keys_num = 0; 48 49 wint_t symbol_decimal_point = L'.'; 50 /* there is no default thousands separator in collate rules: */ 51 wint_t symbol_thousands_sep = 0; 52 wint_t symbol_negative_sign = L'-'; 53 wint_t symbol_positive_sign = L'+'; 54 55 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 56 static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 57 static int monthcoll(struct key_value*, struct key_value *, size_t offset); 58 static int numcoll(struct key_value*, struct key_value *, size_t offset); 59 static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 60 static int randomcoll(struct key_value*, struct key_value *, size_t offset); 61 static int versioncoll(struct key_value*, struct key_value *, size_t offset); 62 63 /* 64 * Allocate keys array 65 */ 66 struct keys_array * 67 keys_array_alloc(void) 68 { 69 struct keys_array *ka; 70 size_t sz; 71 72 sz = keys_array_size(); 73 ka = sort_calloc(1, sz); 74 75 return ka; 76 } 77 78 /* 79 * Calculate whether we need key hint space 80 */ 81 static size_t 82 key_hint_size(void) 83 { 84 return need_hint ? sizeof(struct key_hint) : 0; 85 } 86 87 /* 88 * Calculate keys array size 89 */ 90 size_t 91 keys_array_size(void) 92 { 93 return keys_num * (sizeof(struct key_value) + key_hint_size()); 94 } 95 96 /* 97 * Clean data of keys array 98 */ 99 void 100 clean_keys_array(const struct bwstring *s, struct keys_array *ka) 101 { 102 if (ka) { 103 size_t i; 104 105 for (i = 0; i < keys_num; ++i) 106 if (ka->key[i].k && ka->key[i].k != s) 107 bwsfree(ka->key[i].k); 108 memset(ka, 0, keys_array_size()); 109 } 110 } 111 112 /* 113 * Set value of a key in the keys set 114 */ 115 void 116 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 117 { 118 if (ka && keys_num > ind) { 119 struct key_value *kv; 120 121 kv = &(ka->key[ind]); 122 123 if (kv->k != s) 124 bwsfree(kv->k); 125 kv->k = s; 126 } 127 } 128 129 /* 130 * Initialize a sort list item 131 */ 132 struct sort_list_item * 133 sort_list_item_alloc(void) 134 { 135 struct sort_list_item *si; 136 size_t sz; 137 138 sz = sizeof(struct sort_list_item) + keys_array_size(); 139 si = sort_calloc(1, sz); 140 141 return si; 142 } 143 144 size_t 145 sort_list_item_size(struct sort_list_item *si) 146 { 147 size_t i, ret = 0; 148 149 if (si) { 150 ret = sizeof(struct sort_list_item) + keys_array_size(); 151 if (si->str) 152 ret += bws_memsize(si->str); 153 for (i = 0; i < keys_num; ++i) { 154 struct key_value *kv; 155 156 kv = &(si->ka.key[i]); 157 158 if (kv->k != si->str) 159 ret += bws_memsize(kv->k); 160 } 161 } 162 return ret; 163 } 164 165 /* 166 * Calculate key for a sort list item 167 */ 168 static void 169 sort_list_item_make_key(struct sort_list_item *si) 170 { 171 preproc(si->str, &(si->ka)); 172 } 173 174 /* 175 * Set value of a sort list item. 176 * Return combined string and keys memory size. 177 */ 178 void 179 sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 180 { 181 if (si) { 182 clean_keys_array(si->str, &(si->ka)); 183 if (si->str) { 184 if (si->str == str) { 185 /* we are trying to reset the same string */ 186 return; 187 } else { 188 bwsfree(si->str); 189 si->str = NULL; 190 } 191 } 192 si->str = str; 193 sort_list_item_make_key(si); 194 } 195 } 196 197 /* 198 * De-allocate a sort list item object memory 199 */ 200 void 201 sort_list_item_clean(struct sort_list_item *si) 202 { 203 if (si) { 204 clean_keys_array(si->str, &(si->ka)); 205 if (si->str) { 206 bwsfree(si->str); 207 si->str = NULL; 208 } 209 } 210 } 211 212 /* 213 * Skip columns according to specs 214 */ 215 static size_t 216 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 217 bool skip_blanks, bool *empty_key) 218 { 219 if (cols < 1) 220 return BWSLEN(s) + 1; 221 222 if (skip_blanks) 223 while (start < BWSLEN(s) && iswblank(BWS_GET(s, start))) 224 ++start; 225 226 while (start < BWSLEN(s) && cols > 1) { 227 --cols; 228 ++start; 229 } 230 231 if (start >= BWSLEN(s)) 232 *empty_key = true; 233 234 return start; 235 } 236 237 /* 238 * Skip fields according to specs 239 */ 240 static size_t 241 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 242 { 243 if (fields < 2) { 244 if (BWSLEN(s) == 0) 245 *empty_field = true; 246 return 0; 247 } else if (!(sort_opts_vals.tflag)) { 248 size_t cpos = 0; 249 bool pb = true; 250 251 while (cpos < BWSLEN(s)) { 252 bool isblank; 253 254 isblank = iswblank(BWS_GET(s, cpos)); 255 256 if (isblank && !pb) { 257 --fields; 258 if (fields <= 1) 259 return cpos; 260 } 261 pb = isblank; 262 ++cpos; 263 } 264 if (fields > 1) 265 *empty_field = true; 266 return cpos; 267 } else { 268 size_t cpos = 0; 269 270 while (cpos < BWSLEN(s)) { 271 if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) { 272 --fields; 273 if (fields <= 1) 274 return cpos + 1; 275 } 276 ++cpos; 277 } 278 if (fields > 1) 279 *empty_field = true; 280 return cpos; 281 } 282 } 283 284 /* 285 * Find fields start 286 */ 287 static void 288 find_field_start(const struct bwstring *s, struct key_specs *ks, 289 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 290 { 291 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 292 if (!*empty_field) 293 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 294 ks->pos1b, empty_key); 295 else 296 *empty_key = true; 297 } 298 299 /* 300 * Find end key position 301 */ 302 static size_t 303 find_field_end(const struct bwstring *s, struct key_specs *ks) 304 { 305 size_t f2, next_field_start, pos_end; 306 bool empty_field, empty_key; 307 308 empty_field = false; 309 empty_key = false; 310 f2 = ks->f2; 311 312 if (f2 == 0) 313 return BWSLEN(s) + 1; 314 else { 315 if (ks->c2 == 0) { 316 next_field_start = skip_fields_to_start(s, f2 + 1, 317 &empty_field); 318 if ((next_field_start > 0) && sort_opts_vals.tflag && 319 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 320 next_field_start - 1))) 321 --next_field_start; 322 } else 323 next_field_start = skip_fields_to_start(s, f2, 324 &empty_field); 325 } 326 327 if (empty_field || (next_field_start >= BWSLEN(s))) 328 return BWSLEN(s) + 1; 329 330 if (ks->c2) { 331 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 332 ks->pos2b, &empty_key); 333 if (pos_end < BWSLEN(s)) 334 ++pos_end; 335 } else 336 pos_end = next_field_start; 337 338 return pos_end; 339 } 340 341 /* 342 * Cut a field according to the key specs 343 */ 344 static struct bwstring * 345 cut_field(const struct bwstring *s, struct key_specs *ks) 346 { 347 struct bwstring *ret = NULL; 348 349 if (s && ks) { 350 size_t field_start, key_end, key_start, sz; 351 bool empty_field, empty_key; 352 353 field_start = 0; 354 key_start = 0; 355 empty_field = false; 356 empty_key = false; 357 358 find_field_start(s, ks, &field_start, &key_start, 359 &empty_field, &empty_key); 360 361 if (empty_key) 362 sz = 0; 363 else { 364 key_end = find_field_end(s, ks); 365 sz = (key_end < key_start) ? 0 : (key_end - key_start); 366 } 367 368 ret = bwsalloc(sz); 369 if (sz) 370 bwsnocpy(ret, s, key_start, sz); 371 } else 372 ret = bwsalloc(0); 373 374 return ret; 375 } 376 377 /* 378 * Preprocesses a line applying the necessary transformations 379 * specified by command line options and returns the preprocessed 380 * string, which can be used to compare. 381 */ 382 int 383 preproc(struct bwstring *s, struct keys_array *ka) 384 { 385 if (sort_opts_vals.kflag) { 386 size_t i; 387 for (i = 0; i < keys_num; i++) { 388 struct bwstring *key; 389 struct key_specs *kspecs; 390 struct sort_mods *sm; 391 392 kspecs = &(keys[i]); 393 key = cut_field(s, kspecs); 394 395 sm = &(kspecs->sm); 396 if (sm->dflag) 397 key = dictionary_order(key); 398 else if (sm->iflag) 399 key = ignore_nonprinting(key); 400 if (sm->fflag || sm->Mflag) 401 key = ignore_case(key); 402 403 set_key_on_keys_array(ka, key, i); 404 } 405 } else { 406 struct bwstring *ret = NULL; 407 struct sort_mods *sm = default_sort_mods; 408 409 #ifdef GNUSORT_COMPATIBILITY 410 if (sm->bflag) { 411 if (ret == NULL) 412 ret = bwsdup(s); 413 ret = ignore_leading_blanks(ret); 414 } 415 #endif 416 if (sm->dflag) { 417 if (ret == NULL) 418 ret = bwsdup(s); 419 ret = dictionary_order(ret); 420 } else if (sm->iflag) { 421 if (ret == NULL) 422 ret = bwsdup(s); 423 ret = ignore_nonprinting(ret); 424 } 425 if (sm->fflag || sm->Mflag) { 426 if (ret == NULL) 427 ret = bwsdup(s); 428 ret = ignore_case(ret); 429 } 430 if (ret == NULL) 431 set_key_on_keys_array(ka, s, 0); 432 else 433 set_key_on_keys_array(ka, ret, 0); 434 } 435 436 return 0; 437 } 438 439 cmpcoll_t 440 get_sort_func(struct sort_mods *sm) 441 { 442 if (sm->nflag) 443 return numcoll; 444 else if (sm->hflag) 445 return hnumcoll; 446 else if (sm->gflag) 447 return gnumcoll; 448 else if (sm->Mflag) 449 return monthcoll; 450 else if (sm->Rflag) 451 return randomcoll; 452 else if (sm->Vflag) 453 return versioncoll; 454 else 455 return wstrcoll; 456 } 457 458 /* 459 * Compares the given strings. Returns a positive number if 460 * the first precedes the second, a negative number if the second is 461 * the preceding one, and zero if they are equal. This function calls 462 * the underlying collate functions, which done the actual comparison. 463 */ 464 int 465 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 466 { 467 struct sort_mods *sm; 468 int res = 0; 469 size_t i; 470 471 for (i = 0; i < keys_num; ++i) { 472 sm = &(keys[i].sm); 473 474 if (sm->rflag) 475 res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset); 476 else 477 res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset); 478 479 if (res) 480 break; 481 482 /* offset applies to only the first key */ 483 offset = 0; 484 } 485 return res; 486 } 487 488 /* 489 * Compare two strings. 490 * Plain symbol-by-symbol comparison. 491 */ 492 int 493 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 494 { 495 if (default_sort_mods->rflag) { 496 const struct bwstring *tmp; 497 498 tmp = s1; 499 s1 = s2; 500 s2 = tmp; 501 } 502 503 return bwscoll(s1, s2, 0); 504 } 505 506 /* 507 * Compare a string and a sort list item, according to the sort specs. 508 */ 509 int 510 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 511 { 512 struct keys_array *ka1; 513 int ret = 0; 514 515 ka1 = keys_array_alloc(); 516 517 preproc(str1, ka1); 518 519 sort_list_item_make_key(*ss2); 520 521 if (debug_sort) { 522 bwsprintf(stdout, str1, "; s1=<", ">"); 523 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 524 } 525 526 ret = key_coll(ka1, &((*ss2)->ka), 0); 527 528 if (debug_sort) 529 printf("; cmp1=%d", ret); 530 531 clean_keys_array(str1, ka1); 532 sort_free(ka1); 533 534 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 535 ret = top_level_str_coll(str1, ((*ss2)->str)); 536 if (debug_sort) 537 printf("; cmp2=%d", ret); 538 } 539 540 if (debug_sort) 541 putchar('\n'); 542 543 return ret; 544 } 545 546 /* 547 * Compare two sort list items, according to the sort specs. 548 */ 549 int 550 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 551 size_t offset) 552 { 553 int ret; 554 555 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 556 557 if (debug_sort) { 558 if (offset) 559 printf("; offset=%zu", offset); 560 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 561 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 562 printf("; cmp1=%d\n", ret); 563 } 564 565 if (ret) 566 return ret; 567 568 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 569 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 570 if (debug_sort) 571 printf("; cmp2=%d\n", ret); 572 } 573 574 return ret; 575 } 576 577 /* 578 * Compare two sort list items, according to the sort specs. 579 */ 580 int 581 list_coll(const void *ss1, const void *ss2) 582 { 583 return list_coll_offset((struct sort_list_item **)ss1, 584 (struct sort_list_item **)ss2, 0); 585 } 586 587 #define LSCDEF(N) \ 588 static int \ 589 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 590 { \ 591 \ 592 return list_coll_offset(ss1, ss2, N); \ 593 } 594 595 LSCDEF(0) 596 LSCDEF(1) 597 LSCDEF(2) 598 LSCDEF(3) 599 LSCDEF(4) 600 LSCDEF(5) 601 LSCDEF(6) 602 LSCDEF(7) 603 LSCDEF(8) 604 LSCDEF(9) 605 LSCDEF(10) 606 LSCDEF(11) 607 LSCDEF(12) 608 LSCDEF(13) 609 LSCDEF(14) 610 LSCDEF(15) 611 LSCDEF(16) 612 LSCDEF(17) 613 LSCDEF(18) 614 LSCDEF(19) 615 LSCDEF(20) 616 617 listcoll_t 618 get_list_call_func(size_t offset) 619 { 620 static const listcoll_t lsarray[] = { list_coll_0, list_coll_1, 621 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 622 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 623 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 624 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 625 list_coll_18, list_coll_19, list_coll_20 }; 626 627 if (offset <= 20) 628 return lsarray[offset]; 629 630 return list_coll_0; 631 } 632 633 /* 634 * Compare two sort list items, only by their original string. 635 */ 636 int 637 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 638 { 639 return top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 640 } 641 642 /* 643 * Maximum size of a number in the string (before or after decimal point) 644 */ 645 #define MAX_NUM_SIZE (128) 646 647 /* 648 * Set suffix value 649 */ 650 static void 651 setsuffix(wchar_t c, unsigned char *si) 652 { 653 switch (c){ 654 case L'k': 655 case L'K': 656 *si = 1; 657 break; 658 case L'M': 659 *si = 2; 660 break; 661 case L'G': 662 *si = 3; 663 break; 664 case L'T': 665 *si = 4; 666 break; 667 case L'P': 668 *si = 5; 669 break; 670 case L'E': 671 *si = 6; 672 break; 673 case L'Z': 674 *si = 7; 675 break; 676 case L'Y': 677 *si = 8; 678 break; 679 default: 680 *si = 0; 681 }; 682 } 683 684 /* 685 * Read string s and parse the string into a fixed-decimal-point number. 686 * sign equals -1 if the number is negative (explicit plus is not allowed, 687 * according to GNU sort's "info sort". 688 * The number part before decimal point is in the smain, after the decimal 689 * point is in sfrac, tail is the pointer to the remainder of the string. 690 */ 691 static int 692 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 693 { 694 bwstring_iterator s; 695 696 s = bws_begin(s0); 697 698 /* always end the fraction with zero, even if we have no fraction */ 699 sfrac[0] = 0; 700 701 while (iswblank(bws_get_iter_value(s))) 702 s = bws_iterator_inc(s, 1); 703 704 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { 705 *sign = -1; 706 s = bws_iterator_inc(s, 1); 707 } 708 709 // This is '0', not '\0', do not change this 710 while (iswdigit(bws_get_iter_value(s)) && 711 (bws_get_iter_value(s) == L'0')) 712 s = bws_iterator_inc(s, 1); 713 714 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 715 if (iswdigit(bws_get_iter_value(s))) { 716 smain[*main_len] = bws_get_iter_value(s); 717 s = bws_iterator_inc(s, 1); 718 *main_len += 1; 719 } else if (symbol_thousands_sep && 720 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) 721 s = bws_iterator_inc(s, 1); 722 else 723 break; 724 } 725 726 smain[*main_len] = 0; 727 728 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { 729 s = bws_iterator_inc(s, 1); 730 while (iswdigit(bws_get_iter_value(s)) && 731 *frac_len < MAX_NUM_SIZE) { 732 sfrac[*frac_len] = bws_get_iter_value(s); 733 s = bws_iterator_inc(s, 1); 734 *frac_len += 1; 735 } 736 sfrac[*frac_len] = 0; 737 738 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 739 --(*frac_len); 740 sfrac[*frac_len] = L'\0'; 741 } 742 } 743 744 setsuffix(bws_get_iter_value(s), si); 745 746 if ((*main_len + *frac_len) == 0) 747 *sign = 0; 748 749 return 0; 750 } 751 752 /* 753 * Implements string sort. 754 */ 755 static int 756 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 757 { 758 759 if (debug_sort) { 760 if (offset) 761 printf("; offset=%zu\n", offset); 762 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 763 printf("(%zu)", BWSLEN(kv1->k)); 764 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 765 printf("(%zu)", BWSLEN(kv2->k)); 766 } 767 768 return bwscoll(kv1->k, kv2->k, offset); 769 } 770 771 /* 772 * Compare two suffixes 773 */ 774 static inline int 775 cmpsuffix(unsigned char si1, unsigned char si2) 776 { 777 return (char)si1 - (char)si2; 778 } 779 780 /* 781 * Implements numeric sort for -n and -h. 782 */ 783 static int 784 numcoll_impl(struct key_value *kv1, struct key_value *kv2, 785 size_t offset __unused, bool use_suffix) 786 { 787 struct bwstring *s1, *s2; 788 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 789 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 790 int cmp_res, sign1, sign2; 791 size_t frac1, frac2, main1, main2; 792 unsigned char SI1, SI2; 793 bool e1, e2, key1_read, key2_read; 794 795 s1 = kv1->k; 796 s2 = kv2->k; 797 sign1 = sign2 = 0; 798 main1 = main2 = 0; 799 frac1 = frac2 = 0; 800 801 key1_read = key2_read = false; 802 803 if (debug_sort) { 804 bwsprintf(stdout, s1, "; k1=<", ">"); 805 bwsprintf(stdout, s2, ", k2=<", ">"); 806 } 807 808 if (s1 == s2) 809 return 0; 810 811 if (kv1->hint->status == HS_UNINITIALIZED) { 812 /* read the number from the string */ 813 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 814 key1_read = true; 815 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 816 if (main1 < 1 && frac1 < 1) 817 kv1->hint->v.nh.empty=true; 818 kv1->hint->v.nh.si = SI1; 819 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 820 HS_INITIALIZED : HS_ERROR; 821 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 822 } 823 824 if (kv2->hint->status == HS_UNINITIALIZED) { 825 /* read the number from the string */ 826 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 827 key2_read = true; 828 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 829 if (main2 < 1 && frac2 < 1) 830 kv2->hint->v.nh.empty=true; 831 kv2->hint->v.nh.si = SI2; 832 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 833 HS_INITIALIZED : HS_ERROR; 834 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 835 } 836 837 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 838 HS_INITIALIZED) { 839 unsigned long long n1, n2; 840 bool neg1, neg2; 841 842 e1 = kv1->hint->v.nh.empty; 843 e2 = kv2->hint->v.nh.empty; 844 845 if (e1 && e2) 846 return 0; 847 848 neg1 = kv1->hint->v.nh.neg; 849 neg2 = kv2->hint->v.nh.neg; 850 851 if (neg1 && !neg2) 852 return -1; 853 if (neg2 && !neg1) 854 return 1; 855 856 if (e1) 857 return neg2 ? 1 : -1; 858 else if (e2) 859 return neg1 ? -1 : 1; 860 861 862 if (use_suffix) { 863 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 864 if (cmp_res) 865 return neg1 ? -cmp_res : cmp_res; 866 } 867 868 n1 = kv1->hint->v.nh.n1; 869 n2 = kv2->hint->v.nh.n1; 870 if (n1 < n2) 871 return neg1 ? 1 : -1; 872 else if (n1 > n2) 873 return neg1 ? -1 : 1; 874 } 875 876 /* read the numbers from the strings */ 877 if (!key1_read) 878 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 879 if (!key2_read) 880 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 881 882 e1 = ((main1 + frac1) == 0); 883 e2 = ((main2 + frac2) == 0); 884 885 if (e1 && e2) 886 return 0; 887 888 /* we know the result if the signs are different */ 889 if (sign1 < 0 && sign2 >= 0) 890 return -1; 891 if (sign1 >= 0 && sign2 < 0) 892 return 1; 893 894 if (e1) 895 return (sign2 < 0) ? +1 : -1; 896 else if (e2) 897 return (sign1 < 0) ? -1 : 1; 898 899 if (use_suffix) { 900 cmp_res = cmpsuffix(SI1, SI2); 901 if (cmp_res) 902 return (sign1 < 0) ? -cmp_res : cmp_res; 903 } 904 905 /* if both numbers are empty assume that the strings are equal */ 906 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 907 return 0; 908 909 /* 910 * if the main part is of different size, we know the result 911 * (because the leading zeros are removed) 912 */ 913 if (main1 < main2) 914 cmp_res = -1; 915 else if (main1 > main2) 916 cmp_res = +1; 917 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 918 else 919 cmp_res = wcscmp(smain1, smain2); 920 921 /* check fraction */ 922 if (!cmp_res) 923 cmp_res = wcscmp(sfrac1, sfrac2); 924 925 if (!cmp_res) 926 return 0; 927 928 /* reverse result if the signs are negative */ 929 if (sign1 < 0 && sign2 < 0) 930 cmp_res = -cmp_res; 931 932 return cmp_res; 933 } 934 935 /* 936 * Implements numeric sort (-n). 937 */ 938 static int 939 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 940 { 941 return numcoll_impl(kv1, kv2, offset, false); 942 } 943 944 /* 945 * Implements 'human' numeric sort (-h). 946 */ 947 static int 948 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 949 { 950 return numcoll_impl(kv1, kv2, offset, true); 951 } 952 953 /* 954 * Implements random sort (-R). 955 */ 956 static int 957 randomcoll(struct key_value *kv1, struct key_value *kv2, 958 size_t offset __unused) 959 { 960 struct bwstring *s1, *s2; 961 MD5_CTX ctx1, ctx2; 962 char *b1, *b2; 963 964 s1 = kv1->k; 965 s2 = kv2->k; 966 967 if (debug_sort) { 968 bwsprintf(stdout, s1, "; k1=<", ">"); 969 bwsprintf(stdout, s2, ", k2=<", ">"); 970 } 971 972 if (s1 == s2) 973 return 0; 974 975 memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX)); 976 memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX)); 977 978 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 979 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 980 b1 = MD5End(&ctx1, NULL); 981 b2 = MD5End(&ctx2, NULL); 982 if (b1 == NULL) { 983 if (b2 == NULL) 984 return 0; 985 else { 986 sort_free(b2); 987 return -1; 988 } 989 } else if (b2 == NULL) { 990 sort_free(b1); 991 return 1; 992 } else { 993 int cmp_res; 994 995 cmp_res = strcmp(b1, b2); 996 sort_free(b1); 997 sort_free(b2); 998 999 if (!cmp_res) 1000 cmp_res = bwscoll(s1, s2, 0); 1001 1002 return cmp_res; 1003 } 1004 } 1005 1006 /* 1007 * Implements version sort (-V). 1008 */ 1009 static int 1010 versioncoll(struct key_value *kv1, struct key_value *kv2, 1011 size_t offset __unused) 1012 { 1013 struct bwstring *s1, *s2; 1014 1015 s1 = kv1->k; 1016 s2 = kv2->k; 1017 1018 if (debug_sort) { 1019 bwsprintf(stdout, s1, "; k1=<", ">"); 1020 bwsprintf(stdout, s2, ", k2=<", ">"); 1021 } 1022 1023 if (s1 == s2) 1024 return 0; 1025 1026 return vcmp(s1, s2); 1027 } 1028 1029 /* 1030 * Check for minus infinity 1031 */ 1032 static inline bool 1033 huge_minus(double d, int err1) 1034 { 1035 if (err1 == ERANGE) 1036 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1037 return 1; 1038 1039 return 0; 1040 } 1041 1042 /* 1043 * Check for plus infinity 1044 */ 1045 static inline bool 1046 huge_plus(double d, int err1) 1047 { 1048 if (err1 == ERANGE) 1049 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1050 return 1; 1051 1052 return 0; 1053 } 1054 1055 /* 1056 * Check whether a function is a NAN 1057 */ 1058 static bool 1059 is_nan(double d) 1060 { 1061 #if defined(NAN) 1062 return (d == NAN || isnan(d)); 1063 #else 1064 return (isnan(d)); 1065 #endif 1066 } 1067 1068 /* 1069 * Compare two NANs 1070 */ 1071 static int 1072 cmp_nans(double d1, double d2) 1073 { 1074 if (d1 == d2) 1075 return 0; 1076 return d1 < d2 ? -1 : 1; 1077 } 1078 1079 /* 1080 * Implements general numeric sort (-g). 1081 */ 1082 static int 1083 gnumcoll(struct key_value *kv1, struct key_value *kv2, 1084 size_t offset __unused) 1085 { 1086 double d1, d2; 1087 int err1, err2; 1088 bool empty1, empty2, key1_read, key2_read; 1089 1090 d1 = d2 = 0; 1091 err1 = err2 = 0; 1092 key1_read = key2_read = false; 1093 1094 if (debug_sort) { 1095 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1096 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1097 } 1098 1099 if (kv1->hint->status == HS_UNINITIALIZED) { 1100 errno = 0; 1101 d1 = bwstod(kv1->k, &empty1); 1102 err1 = errno; 1103 1104 if (empty1) { 1105 kv1->hint->v.gh.notnum = true; 1106 kv1->hint->status = HS_INITIALIZED; 1107 } else if (err1 == 0) { 1108 kv1->hint->v.gh.d = d1; 1109 kv1->hint->v.gh.nan = is_nan(d1); 1110 kv1->hint->status = HS_INITIALIZED; 1111 } else 1112 kv1->hint->status = HS_ERROR; 1113 1114 key1_read = true; 1115 } 1116 1117 if (kv2->hint->status == HS_UNINITIALIZED) { 1118 errno = 0; 1119 d2 = bwstod(kv2->k, &empty2); 1120 err2 = errno; 1121 1122 if (empty2) { 1123 kv2->hint->v.gh.notnum = true; 1124 kv2->hint->status = HS_INITIALIZED; 1125 } else if (err2 == 0) { 1126 kv2->hint->v.gh.d = d2; 1127 kv2->hint->v.gh.nan = is_nan(d2); 1128 kv2->hint->status = HS_INITIALIZED; 1129 } else 1130 kv2->hint->status = HS_ERROR; 1131 1132 key2_read = true; 1133 } 1134 1135 if (kv1->hint->status == HS_INITIALIZED && 1136 kv2->hint->status == HS_INITIALIZED) { 1137 #ifdef GNUSORT_COMPATIBILITY 1138 if (kv1->hint->v.gh.notnum) 1139 return kv2->hint->v.gh.notnum ? 0 : -1; 1140 else if (kv2->hint->v.gh.notnum) 1141 return 1; 1142 #else 1143 if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum) 1144 return 0; 1145 #endif 1146 1147 if (kv1->hint->v.gh.nan) 1148 return kv2->hint->v.gh.nan ? 1149 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1; 1150 else if (kv2->hint->v.gh.nan) 1151 return 1; 1152 1153 d1 = kv1->hint->v.gh.d; 1154 d2 = kv2->hint->v.gh.d; 1155 1156 if (d1 < d2) 1157 return -1; 1158 else if (d1 > d2) 1159 return 1; 1160 else 1161 return 0; 1162 } 1163 1164 if (!key1_read) { 1165 errno = 0; 1166 d1 = bwstod(kv1->k, &empty1); 1167 err1 = errno; 1168 } 1169 1170 if (!key2_read) { 1171 errno = 0; 1172 d2 = bwstod(kv2->k, &empty2); 1173 err2 = errno; 1174 } 1175 1176 /* Non-value case */ 1177 #ifdef GNUSORT_COMPATIBILITY 1178 if (empty1) 1179 return empty2 ? 0 : -1; 1180 else if (empty2) 1181 return 1; 1182 #else 1183 if (empty1 && empty2) 1184 return 0; 1185 #endif 1186 1187 /* NAN case */ 1188 if (is_nan(d1)) 1189 return is_nan(d2) ? cmp_nans(d1, d2) : -1; 1190 else if (is_nan(d2)) 1191 return 1; 1192 1193 /* Infinities */ 1194 if (err1 == ERANGE || err2 == ERANGE) { 1195 /* Minus infinity case */ 1196 if (huge_minus(d1, err1)) { 1197 if (huge_minus(d2, err2)) { 1198 if (d1 == d2) 1199 return 0; 1200 return d1 < d2 ? -1 : 1; 1201 } else 1202 return -1; 1203 1204 } else if (huge_minus(d2, err2)) { 1205 if (huge_minus(d1, err1)) { 1206 if (d1 == d2) 1207 return 0; 1208 return d1 < d2 ? -1 : 1; 1209 } else 1210 return 1; 1211 } 1212 1213 /* Plus infinity case */ 1214 if (huge_plus(d1, err1)) { 1215 if (huge_plus(d2, err2)) { 1216 if (d1 == d2) 1217 return 0; 1218 return d1 < d2 ? -1 : 1; 1219 } else 1220 return 1; 1221 } else if (huge_plus(d2, err2)) { 1222 if (huge_plus(d1, err1)) { 1223 if (d1 == d2) 1224 return 0; 1225 return d1 < d2 ? -1 : 1; 1226 } else 1227 return -1; 1228 } 1229 } 1230 1231 if (d1 == d2) 1232 return 0; 1233 return d1 < d2 ? -1 : 1; 1234 } 1235 1236 /* 1237 * Implements month sort (-M). 1238 */ 1239 static int 1240 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1241 { 1242 int val1, val2; 1243 bool key1_read, key2_read; 1244 1245 val1 = val2 = 0; 1246 key1_read = key2_read = false; 1247 1248 if (debug_sort) { 1249 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1250 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1251 } 1252 1253 if (kv1->hint->status == HS_UNINITIALIZED) { 1254 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1255 key1_read = true; 1256 kv1->hint->status = HS_INITIALIZED; 1257 } 1258 1259 if (kv2->hint->status == HS_UNINITIALIZED) { 1260 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1261 key2_read = true; 1262 kv2->hint->status = HS_INITIALIZED; 1263 } 1264 1265 if (kv1->hint->status == HS_INITIALIZED) { 1266 val1 = kv1->hint->v.Mh.m; 1267 key1_read = true; 1268 } 1269 1270 if (kv2->hint->status == HS_INITIALIZED) { 1271 val2 = kv2->hint->v.Mh.m; 1272 key2_read = true; 1273 } 1274 1275 if (!key1_read) 1276 val1 = bws_month_score(kv1->k); 1277 if (!key2_read) 1278 val2 = bws_month_score(kv2->k); 1279 1280 if (val1 == val2) 1281 return 0; 1282 return val1 < val2 ? -1 : 1; 1283 } 1284