1 /* $OpenBSD: bwstring.c,v 1.7 2015/04/01 22:38:08 millert Exp $ */ 2 3 /*- 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <ctype.h> 31 #include <errno.h> 32 #include <err.h> 33 #include <langinfo.h> 34 #include <math.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <wchar.h> 38 #include <wctype.h> 39 40 #include "bwstring.h" 41 #include "sort.h" 42 43 bool byte_sort; 44 size_t sort_mb_cur_max = 1; 45 46 static wchar_t **wmonths; 47 static char **cmonths; 48 49 /* initialise months */ 50 51 void 52 initialise_months(void) 53 { 54 const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, 55 ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10, 56 ABMON_11, ABMON_12 }; 57 char *tmp; 58 size_t len; 59 60 if (sort_mb_cur_max == 1) { 61 if (cmonths == NULL) { 62 char *m; 63 unsigned int j; 64 int i; 65 66 cmonths = sort_malloc(sizeof(char *) * 12); 67 for (i = 0; i < 12; i++) { 68 cmonths[i] = NULL; 69 tmp = nl_langinfo(item[i]); 70 if (debug_sort) 71 printf("month[%d]=%s\n", i, tmp); 72 if (*tmp == '\0') 73 continue; 74 m = sort_strdup(tmp); 75 len = strlen(tmp); 76 for (j = 0; j < len; j++) 77 m[j] = toupper(m[j]); 78 cmonths[i] = m; 79 } 80 } 81 } else { 82 if (wmonths == NULL) { 83 unsigned int j; 84 wchar_t *m; 85 int i; 86 87 wmonths = sort_malloc(sizeof(wchar_t *) * 12); 88 for (i = 0; i < 12; i++) { 89 wmonths[i] = NULL; 90 tmp = nl_langinfo(item[i]); 91 if (debug_sort) 92 printf("month[%d]=%s\n", i, tmp); 93 if (*tmp == '\0') 94 continue; 95 len = strlen(tmp); 96 m = sort_reallocarray(NULL, len + 1, 97 sizeof(wchar_t)); 98 if (mbstowcs(m, tmp, len) == (size_t)-1) { 99 sort_free(m); 100 continue; 101 } 102 m[len] = L'\0'; 103 for (j = 0; j < len; j++) 104 m[j] = towupper(m[j]); 105 wmonths[i] = m; 106 } 107 } 108 } 109 } 110 111 /* 112 * Compare two wide-character strings 113 */ 114 static int 115 wide_str_coll(const wchar_t *s1, const wchar_t *s2) 116 { 117 int ret = 0; 118 119 errno = 0; 120 ret = wcscoll(s1, s2); 121 if (errno == EILSEQ) { 122 errno = 0; 123 ret = wcscmp(s1, s2); 124 if (errno != 0) { 125 size_t i; 126 for (i = 0; ; ++i) { 127 wchar_t c1 = s1[i]; 128 wchar_t c2 = s2[i]; 129 if (c1 == L'\0') 130 return (c2 == L'\0') ? 0 : -1; 131 if (c2 == L'\0') 132 return 1; 133 if (c1 == c2) 134 continue; 135 return (int)c1 - (int)c2; 136 } 137 } 138 } 139 return ret; 140 } 141 142 /* counterparts of wcs functions */ 143 144 void 145 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) 146 { 147 if (sort_mb_cur_max == 1) 148 fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix); 149 else 150 fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix); 151 } 152 153 const void * 154 bwsrawdata(const struct bwstring *bws) 155 { 156 return &(bws->data); 157 } 158 159 size_t 160 bwsrawlen(const struct bwstring *bws) 161 { 162 return (sort_mb_cur_max == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len); 163 } 164 165 size_t 166 bws_memsize(const struct bwstring *bws) 167 { 168 return (sort_mb_cur_max == 1) ? (bws->len + 2 + sizeof(struct bwstring)) : 169 (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring)); 170 } 171 172 void 173 bws_setlen(struct bwstring *bws, size_t newlen) 174 { 175 if (bws && newlen != bws->len && newlen <= bws->len) { 176 bws->len = newlen; 177 if (sort_mb_cur_max == 1) 178 bws->data.cstr[newlen] = '\0'; 179 else 180 bws->data.wstr[newlen] = L'\0'; 181 } 182 } 183 184 /* 185 * Allocate a new binary string of specified size 186 */ 187 struct bwstring * 188 bwsalloc(size_t sz) 189 { 190 struct bwstring *ret; 191 192 if (sort_mb_cur_max == 1) { 193 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); 194 ret->data.cstr[sz] = '\0'; 195 } else { 196 ret = sort_malloc(sizeof(struct bwstring) + 197 SIZEOF_WCHAR_STRING(sz + 1)); 198 ret->data.wstr[sz] = L'\0'; 199 } 200 ret->len = sz; 201 202 return ret; 203 } 204 205 /* 206 * Create a copy of binary string. 207 * New string size equals the length of the old string. 208 */ 209 struct bwstring * 210 bwsdup(const struct bwstring *s) 211 { 212 struct bwstring *ret; 213 214 if (s == NULL) 215 return NULL; 216 217 ret = bwsalloc(s->len); 218 219 if (sort_mb_cur_max == 1) 220 memcpy(ret->data.cstr, s->data.cstr, s->len); 221 else 222 memcpy(ret->data.wstr, s->data.wstr, 223 SIZEOF_WCHAR_STRING(s->len)); 224 225 return ret; 226 } 227 228 /* 229 * Create a new binary string from a wide character buffer. 230 */ 231 struct bwstring * 232 bwssbdup(const wchar_t *str, size_t len) 233 { 234 if (str == NULL) 235 return (len == 0) ? bwsalloc(0) : NULL; 236 else { 237 struct bwstring *ret; 238 size_t i; 239 240 ret = bwsalloc(len); 241 242 if (sort_mb_cur_max == 1) 243 for (i = 0; i < len; ++i) 244 ret->data.cstr[i] = (unsigned char) str[i]; 245 else 246 memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len)); 247 248 return ret; 249 } 250 } 251 252 /* 253 * Create a new binary string from a raw binary buffer. 254 */ 255 struct bwstring * 256 bwscsbdup(const unsigned char *str, size_t len) 257 { 258 struct bwstring *ret; 259 260 ret = bwsalloc(len); 261 262 if (str) { 263 if (sort_mb_cur_max == 1) 264 memcpy(ret->data.cstr, str, len); 265 else { 266 mbstate_t mbs; 267 const char *s; 268 size_t charlen, chars, cptr; 269 270 chars = 0; 271 cptr = 0; 272 s = (const char *) str; 273 274 memset(&mbs, 0, sizeof(mbs)); 275 276 while (cptr < len) { 277 size_t n = sort_mb_cur_max; 278 279 if (n > len - cptr) 280 n = len - cptr; 281 charlen = mbrlen(s + cptr, n, &mbs); 282 switch (charlen) { 283 case 0: 284 /* FALLTHROUGH */ 285 case (size_t) -1: 286 /* FALLTHROUGH */ 287 case (size_t) -2: 288 ret->data.wstr[chars++] = 289 (unsigned char) s[cptr]; 290 ++cptr; 291 break; 292 default: 293 n = mbrtowc(ret->data.wstr + (chars++), 294 s + cptr, charlen, &mbs); 295 if ((n == (size_t)-1) || (n == (size_t)-2)) 296 /* NOTREACHED */ 297 err(2, "mbrtowc error"); 298 cptr += charlen; 299 }; 300 } 301 302 ret->len = chars; 303 ret->data.wstr[ret->len] = L'\0'; 304 } 305 } 306 return ret; 307 } 308 309 /* 310 * De-allocate object memory 311 */ 312 void 313 bwsfree(struct bwstring *s) 314 { 315 sort_free(s); 316 } 317 318 /* 319 * Copy content of src binary string to dst. 320 * If the capacity of the dst string is not sufficient, 321 * then the data is truncated. 322 */ 323 size_t 324 bwscpy(struct bwstring *dst, const struct bwstring *src) 325 { 326 size_t nums = src->len; 327 328 if (nums > dst->len) 329 nums = dst->len; 330 dst->len = nums; 331 332 if (sort_mb_cur_max == 1) { 333 memcpy(dst->data.cstr, src->data.cstr, nums); 334 dst->data.cstr[dst->len] = '\0'; 335 } else { 336 memcpy(dst->data.wstr, src->data.wstr, 337 SIZEOF_WCHAR_STRING(nums + 1)); 338 dst->data.wstr[dst->len] = L'\0'; 339 } 340 341 return nums; 342 } 343 344 /* 345 * Copy content of src binary string to dst, 346 * with specified number of symbols to be copied. 347 * If the capacity of the dst string is not sufficient, 348 * then the data is truncated. 349 */ 350 struct bwstring * 351 bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size) 352 { 353 size_t nums = src->len; 354 355 if (nums > dst->len) 356 nums = dst->len; 357 if (nums > size) 358 nums = size; 359 dst->len = nums; 360 361 if (sort_mb_cur_max == 1) { 362 memcpy(dst->data.cstr, src->data.cstr, nums); 363 dst->data.cstr[dst->len] = '\0'; 364 } else { 365 memcpy(dst->data.wstr, src->data.wstr, 366 SIZEOF_WCHAR_STRING(nums + 1)); 367 dst->data.wstr[dst->len] = L'\0'; 368 } 369 370 return dst; 371 } 372 373 /* 374 * Copy content of src binary string to dst, 375 * with specified number of symbols to be copied. 376 * An offset value can be specified, from the start of src string. 377 * If the capacity of the dst string is not sufficient, 378 * then the data is truncated. 379 */ 380 struct bwstring * 381 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, 382 size_t size) 383 { 384 if (offset >= src->len) { 385 dst->data.wstr[0] = 0; 386 dst->len = 0; 387 } else { 388 size_t nums = src->len - offset; 389 390 if (nums > dst->len) 391 nums = dst->len; 392 if (nums > size) 393 nums = size; 394 dst->len = nums; 395 if (sort_mb_cur_max == 1) { 396 memcpy(dst->data.cstr, src->data.cstr + offset, 397 (nums)); 398 dst->data.cstr[dst->len] = '\0'; 399 } else { 400 memcpy(dst->data.wstr, src->data.wstr + offset, 401 SIZEOF_WCHAR_STRING(nums)); 402 dst->data.wstr[dst->len] = L'\0'; 403 } 404 } 405 return dst; 406 } 407 408 /* 409 * Write binary string to the file. 410 * The output is ended either with '\n' (nl == true) 411 * or '\0' (nl == false). 412 */ 413 size_t 414 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) 415 { 416 if (sort_mb_cur_max == 1) { 417 size_t len = bws->len; 418 419 if (!zero_ended) { 420 bws->data.cstr[len] = '\n'; 421 422 if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) 423 err(2, NULL); 424 425 bws->data.cstr[len] = '\0'; 426 } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) 427 err(2, NULL); 428 429 return len + 1; 430 431 } else { 432 wchar_t eols; 433 size_t printed = 0; 434 435 eols = zero_ended ? btowc('\0') : btowc('\n'); 436 437 while (printed < BWSLEN(bws)) { 438 const wchar_t *s = bws->data.wstr + printed; 439 440 if (*s == L'\0') { 441 int nums; 442 443 nums = fwprintf(f, L"%lc", *s); 444 445 if (nums != 1) 446 err(2, NULL); 447 ++printed; 448 } else { 449 int nums; 450 451 nums = fwprintf(f, L"%ls", s); 452 453 if (nums < 1) 454 err(2, NULL); 455 printed += nums; 456 } 457 } 458 fwprintf(f, L"%lc", eols); 459 return printed + 1; 460 } 461 } 462 463 /* 464 * Allocate and read a binary string from file. 465 * The strings are nl-ended or zero-ended, depending on the sort setting. 466 */ 467 struct bwstring * 468 bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb) 469 { 470 wint_t eols; 471 472 eols = zero_ended ? btowc('\0') : btowc('\n'); 473 474 if (!zero_ended && (sort_mb_cur_max > 1)) { 475 wchar_t *ret; 476 477 ret = fgetwln(f, len); 478 479 if (ret == NULL) { 480 if (!feof(f)) 481 err(2, NULL); 482 return NULL; 483 } 484 if (*len > 0) { 485 if (ret[*len - 1] == (wchar_t)eols) 486 --(*len); 487 } 488 return bwssbdup(ret, *len); 489 490 } else if (!zero_ended && (sort_mb_cur_max == 1)) { 491 char *ret; 492 493 ret = fgetln(f, len); 494 495 if (ret == NULL) { 496 if (!feof(f)) 497 err(2, NULL); 498 return NULL; 499 } 500 if (*len > 0) { 501 if (ret[*len - 1] == '\n') 502 --(*len); 503 } 504 return bwscsbdup((unsigned char *)ret, *len); 505 506 } else { 507 *len = 0; 508 509 if (feof(f)) 510 return NULL; 511 512 if (2 >= rb->fgetwln_z_buffer_size) { 513 rb->fgetwln_z_buffer_size += 256; 514 rb->fgetwln_z_buffer = 515 sort_reallocarray(rb->fgetwln_z_buffer, 516 rb->fgetwln_z_buffer_size, sizeof(wchar_t)); 517 } 518 rb->fgetwln_z_buffer[*len] = 0; 519 520 if (sort_mb_cur_max == 1) { 521 while (!feof(f)) { 522 int c; 523 524 c = fgetc(f); 525 526 if (c == EOF) { 527 if (*len == 0) 528 return NULL; 529 goto line_read_done; 530 } 531 if (c == eols) 532 goto line_read_done; 533 534 if (*len + 1 >= rb->fgetwln_z_buffer_size) { 535 rb->fgetwln_z_buffer_size += 256; 536 rb->fgetwln_z_buffer = 537 sort_reallocarray(rb->fgetwln_z_buffer, 538 rb->fgetwln_z_buffer_size, sizeof(wchar_t)); 539 } 540 541 rb->fgetwln_z_buffer[*len] = c; 542 rb->fgetwln_z_buffer[++(*len)] = 0; 543 } 544 } else { 545 while (!feof(f)) { 546 wint_t c = 0; 547 548 c = fgetwc(f); 549 550 if (c == WEOF) { 551 if (*len == 0) 552 return NULL; 553 goto line_read_done; 554 } 555 if (c == eols) 556 goto line_read_done; 557 558 if (*len + 1 >= rb->fgetwln_z_buffer_size) { 559 rb->fgetwln_z_buffer_size += 256; 560 rb->fgetwln_z_buffer = 561 sort_reallocarray(rb->fgetwln_z_buffer, 562 rb->fgetwln_z_buffer_size, sizeof(wchar_t)); 563 } 564 565 rb->fgetwln_z_buffer[*len] = c; 566 rb->fgetwln_z_buffer[++(*len)] = 0; 567 } 568 } 569 570 line_read_done: 571 /* we do not count the last 0 */ 572 return bwssbdup(rb->fgetwln_z_buffer, *len); 573 } 574 } 575 576 int 577 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, 578 size_t offset, size_t len) 579 { 580 size_t cmp_len, len1, len2; 581 int res = 0; 582 583 len1 = bws1->len; 584 len2 = bws2->len; 585 586 if (len1 <= offset) { 587 return (len2 <= offset) ? 0 : -1; 588 } else { 589 if (len2 <= offset) 590 return 1; 591 else { 592 len1 -= offset; 593 len2 -= offset; 594 595 cmp_len = len1; 596 597 if (len2 < cmp_len) 598 cmp_len = len2; 599 600 if (len < cmp_len) 601 cmp_len = len; 602 603 if (sort_mb_cur_max == 1) { 604 const unsigned char *s1, *s2; 605 606 s1 = bws1->data.cstr + offset; 607 s2 = bws2->data.cstr + offset; 608 609 res = memcmp(s1, s2, cmp_len); 610 611 } else { 612 const wchar_t *s1, *s2; 613 614 s1 = bws1->data.wstr + offset; 615 s2 = bws2->data.wstr + offset; 616 617 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); 618 } 619 } 620 } 621 622 if (res == 0) { 623 if (len1 < cmp_len && len1 < len2) 624 res = -1; 625 else if (len2 < cmp_len && len2 < len1) 626 res = +1; 627 } 628 629 return res; 630 } 631 632 int 633 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 634 { 635 size_t len1, len2, cmp_len; 636 int res; 637 638 len1 = bws1->len; 639 len2 = bws2->len; 640 641 len1 -= offset; 642 len2 -= offset; 643 644 cmp_len = len1; 645 646 if (len2 < cmp_len) 647 cmp_len = len2; 648 649 res = bwsncmp(bws1, bws2, offset, cmp_len); 650 651 if (res == 0) { 652 if (len1 < len2) 653 res = -1; 654 else if (len2 < len1) 655 res = +1; 656 } 657 658 return res; 659 } 660 661 int 662 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) 663 { 664 wchar_t c1, c2; 665 size_t i = 0; 666 667 for (i = 0; i < len; ++i) { 668 c1 = bws_get_iter_value(iter1); 669 c2 = bws_get_iter_value(iter2); 670 if (c1 != c2) 671 return c1 - c2; 672 iter1 = bws_iterator_inc(iter1, 1); 673 iter2 = bws_iterator_inc(iter2, 1); 674 } 675 676 return 0; 677 } 678 679 int 680 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 681 { 682 size_t len1, len2; 683 684 len1 = bws1->len; 685 len2 = bws2->len; 686 687 if (len1 <= offset) 688 return (len2 <= offset) ? 0 : -1; 689 else { 690 if (len2 <= offset) 691 return 1; 692 else { 693 len1 -= offset; 694 len2 -= offset; 695 696 if (sort_mb_cur_max == 1) { 697 const unsigned char *s1, *s2; 698 699 s1 = bws1->data.cstr + offset; 700 s2 = bws2->data.cstr + offset; 701 702 if (byte_sort) { 703 int res = 0; 704 705 if (len1 > len2) { 706 res = memcmp(s1, s2, len2); 707 if (!res) 708 res = +1; 709 } else if (len1 < len2) { 710 res = memcmp(s1, s2, len1); 711 if (!res) 712 res = -1; 713 } else 714 res = memcmp(s1, s2, len1); 715 716 return res; 717 718 } else { 719 int res = 0; 720 size_t i, maxlen; 721 722 i = 0; 723 maxlen = len1; 724 725 if (maxlen > len2) 726 maxlen = len2; 727 728 while (i < maxlen) { 729 /* goto next non-zero part: */ 730 while ((i < maxlen) && 731 !s1[i] && !s2[i]) 732 ++i; 733 734 if (i >= maxlen) 735 break; 736 737 if (s1[i] == 0) { 738 if (s2[i] == 0) 739 /* NOTREACHED */ 740 err(2, "bwscoll error 01"); 741 else 742 return -1; 743 } else if (s2[i] == 0) 744 return 1; 745 746 res = strcoll((const char *)(s1 + i), (const char *)(s2 + i)); 747 if (res) 748 return res; 749 750 while ((i < maxlen) && 751 s1[i] && s2[i]) 752 ++i; 753 754 if (i >= maxlen) 755 break; 756 757 if (s1[i] == 0) { 758 if (s2[i] == 0) { 759 ++i; 760 continue; 761 } else 762 return -1; 763 } else if (s2[i] == 0) 764 return 1; 765 else 766 /* NOTREACHED */ 767 err(2, "bwscoll error 02"); 768 } 769 770 if (len1 < len2) 771 return -1; 772 else if (len1 > len2) 773 return 1; 774 775 return 0; 776 } 777 } else { 778 const wchar_t *s1, *s2; 779 size_t i, maxlen; 780 int res = 0; 781 782 s1 = bws1->data.wstr + offset; 783 s2 = bws2->data.wstr + offset; 784 785 i = 0; 786 maxlen = len1; 787 788 if (maxlen > len2) 789 maxlen = len2; 790 791 while (i < maxlen) { 792 793 /* goto next non-zero part: */ 794 while ((i < maxlen) && 795 !s1[i] && !s2[i]) 796 ++i; 797 798 if (i >= maxlen) 799 break; 800 801 if (s1[i] == 0) { 802 if (s2[i] == 0) 803 /* NOTREACHED */ 804 err(2, "bwscoll error 1"); 805 else 806 return -1; 807 } else if (s2[i] == 0) 808 return 1; 809 810 res = wide_str_coll(s1 + i, s2 + i); 811 if (res) 812 return res; 813 814 while ((i < maxlen) && s1[i] && s2[i]) 815 ++i; 816 817 if (i >= maxlen) 818 break; 819 820 if (s1[i] == 0) { 821 if (s2[i] == 0) { 822 ++i; 823 continue; 824 } else 825 return -1; 826 } else if (s2[i] == 0) 827 return 1; 828 else 829 /* NOTREACHED */ 830 err(2, "bwscoll error 2"); 831 } 832 833 if (len1 == len2) 834 return 0; 835 return len1 < len2 ? -1 : 1; 836 } 837 } 838 } 839 } 840 841 /* 842 * Correction of the system API 843 */ 844 double 845 bwstod(struct bwstring *s0, bool *empty) 846 { 847 double ret = 0; 848 849 if (sort_mb_cur_max == 1) { 850 char *ep, *end, *s; 851 852 s = (char *)s0->data.cstr; 853 end = s + s0->len; 854 ep = NULL; 855 856 while (isblank((unsigned char)*s) && s < end) 857 ++s; 858 859 if (!isprint((unsigned char)*s)) { 860 *empty = true; 861 return 0; 862 } 863 864 ret = strtod(s, &ep); 865 if (ep == s) { 866 *empty = true; 867 return 0; 868 } 869 } else { 870 wchar_t *end, *ep, *s; 871 872 s = s0->data.wstr; 873 end = s + s0->len; 874 ep = NULL; 875 876 while (iswblank(*s) && s < end) 877 ++s; 878 879 if (!iswprint(*s)) { 880 *empty = true; 881 return 0; 882 } 883 884 ret = wcstod(s, &ep); 885 if (ep == s) { 886 *empty = true; 887 return 0; 888 } 889 } 890 891 *empty = false; 892 return ret; 893 } 894 895 /* 896 * A helper function for monthcoll. If a line matches 897 * a month name, it returns (number of the month - 1), 898 * while if there is no match, it just return -1. 899 */ 900 int 901 bws_month_score(const struct bwstring *s0) 902 { 903 if (sort_mb_cur_max == 1) { 904 const char *end, *s; 905 int i; 906 907 s = (char *)s0->data.cstr; 908 end = s + s0->len; 909 910 while (isblank((unsigned char)*s) && s < end) 911 ++s; 912 913 for (i = 11; i >= 0; --i) { 914 if (cmonths[i] && 915 (s == strstr(s, cmonths[i]))) 916 return i; 917 } 918 } else { 919 const wchar_t *end, *s; 920 int i; 921 922 s = s0->data.wstr; 923 end = s + s0->len; 924 925 while (iswblank(*s) && s < end) 926 ++s; 927 928 for (i = 11; i >= 0; --i) { 929 if (wmonths[i] && (s == wcsstr(s, wmonths[i]))) 930 return i; 931 } 932 } 933 934 return -1; 935 } 936 937 /* 938 * Rips out leading blanks (-b). 939 */ 940 struct bwstring * 941 ignore_leading_blanks(struct bwstring *str) 942 { 943 if (sort_mb_cur_max == 1) { 944 unsigned char *dst, *end, *src; 945 946 src = str->data.cstr; 947 dst = src; 948 end = src + str->len; 949 950 while (src < end && isblank(*src)) 951 ++src; 952 953 if (src != dst) { 954 size_t newlen; 955 956 newlen = BWSLEN(str) - (src - dst); 957 958 while (src < end) { 959 *dst = *src; 960 ++dst; 961 ++src; 962 } 963 bws_setlen(str, newlen); 964 } 965 } else { 966 wchar_t *dst, *end, *src; 967 968 src = str->data.wstr; 969 dst = src; 970 end = src + str->len; 971 972 while (src < end && iswblank(*src)) 973 ++src; 974 975 if (src != dst) { 976 977 size_t newlen = BWSLEN(str) - (src - dst); 978 979 while (src < end) { 980 *dst = *src; 981 ++dst; 982 ++src; 983 } 984 bws_setlen(str, newlen); 985 986 } 987 } 988 return str; 989 } 990 991 /* 992 * Rips out nonprinting characters (-i). 993 */ 994 struct bwstring * 995 ignore_nonprinting(struct bwstring *str) 996 { 997 size_t newlen = str->len; 998 999 if (sort_mb_cur_max == 1) { 1000 unsigned char *dst, *end, *src; 1001 unsigned char c; 1002 1003 src = str->data.cstr; 1004 dst = src; 1005 end = src + str->len; 1006 1007 while (src < end) { 1008 c = *src; 1009 if (isprint(c)) { 1010 *dst = c; 1011 ++dst; 1012 ++src; 1013 } else { 1014 ++src; 1015 --newlen; 1016 } 1017 } 1018 } else { 1019 wchar_t *dst, *end, *src; 1020 wchar_t c; 1021 1022 src = str->data.wstr; 1023 dst = src; 1024 end = src + str->len; 1025 1026 while (src < end) { 1027 c = *src; 1028 if (iswprint(c)) { 1029 *dst = c; 1030 ++dst; 1031 ++src; 1032 } else { 1033 ++src; 1034 --newlen; 1035 } 1036 } 1037 } 1038 bws_setlen(str, newlen); 1039 1040 return str; 1041 } 1042 1043 /* 1044 * Rips out any characters that are not alphanumeric characters 1045 * nor blanks (-d). 1046 */ 1047 struct bwstring * 1048 dictionary_order(struct bwstring *str) 1049 { 1050 size_t newlen = str->len; 1051 1052 if (sort_mb_cur_max == 1) { 1053 unsigned char *dst, *end, *src; 1054 unsigned char c; 1055 1056 src = str->data.cstr; 1057 dst = src; 1058 end = src + str->len; 1059 1060 while (src < end) { 1061 c = *src; 1062 if (isalnum(c) || isblank(c)) { 1063 *dst = c; 1064 ++dst; 1065 ++src; 1066 } else { 1067 ++src; 1068 --newlen; 1069 } 1070 } 1071 } else { 1072 wchar_t *dst, *end, *src; 1073 wchar_t c; 1074 1075 src = str->data.wstr; 1076 dst = src; 1077 end = src + str->len; 1078 1079 while (src < end) { 1080 c = *src; 1081 if (iswalnum(c) || iswblank(c)) { 1082 *dst = c; 1083 ++dst; 1084 ++src; 1085 } else { 1086 ++src; 1087 --newlen; 1088 } 1089 } 1090 } 1091 bws_setlen(str, newlen); 1092 1093 return str; 1094 } 1095 1096 /* 1097 * Converts string to lower case(-f). 1098 */ 1099 struct bwstring * 1100 ignore_case(struct bwstring *str) 1101 { 1102 if (sort_mb_cur_max == 1) { 1103 unsigned char *end, *s; 1104 1105 s = str->data.cstr; 1106 end = s + str->len; 1107 1108 while (s < end) { 1109 *s = toupper(*s); 1110 ++s; 1111 } 1112 } else { 1113 wchar_t *end, *s; 1114 1115 s = str->data.wstr; 1116 end = s + str->len; 1117 1118 while (s < end) { 1119 *s = towupper(*s); 1120 ++s; 1121 } 1122 } 1123 return str; 1124 } 1125 1126 void 1127 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) 1128 { 1129 if (sort_mb_cur_max == 1) 1130 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr); 1131 else 1132 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr); 1133 } 1134