1 /* $OpenBSD: sort.c,v 1.90 2019/06/28 13:35:03 deraadt Exp $ */ 2 3 /*- 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/resource.h> 31 #include <sys/stat.h> 32 #include <sys/sysctl.h> 33 #include <sys/types.h> 34 35 #include <err.h> 36 #include <errno.h> 37 #include <getopt.h> 38 #include <limits.h> 39 #include <md5.h> 40 #include <regex.h> 41 #include <signal.h> 42 #include <stdbool.h> 43 #include <stdint.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <wchar.h> 49 #include <wctype.h> 50 51 #include "coll.h" 52 #include "file.h" 53 #include "sort.h" 54 55 #ifdef GNUSORT_COMPATIBILITY 56 # define PERMUTE "" 57 #else 58 # define PERMUTE "+" 59 #endif 60 #define OPTIONS PERMUTE"bCcdfgHhik:Mmno:RrS:st:T:uVz" 61 62 static bool need_random; 63 static const char *random_source; 64 65 MD5_CTX md5_ctx; 66 67 struct sort_opts sort_opts_vals; 68 69 bool debug_sort; 70 bool need_hint; 71 72 static struct sort_mods default_sort_mods_object; 73 struct sort_mods * const default_sort_mods = &default_sort_mods_object; 74 75 /* 76 * Arguments from file (when file0-from option is used: 77 */ 78 static size_t argc_from_file0 = (size_t)-1; 79 static char **argv_from_file0; 80 81 /* 82 * Placeholder symbols for options which have no single-character equivalent 83 */ 84 enum { 85 SORT_OPT = CHAR_MAX + 1, 86 HELP_OPT, 87 FF_OPT, 88 BS_OPT, 89 VERSION_OPT, 90 DEBUG_OPT, 91 RANDOMSOURCE_OPT, 92 COMPRESSPROGRAM_OPT, 93 QSORT_OPT, 94 HEAPSORT_OPT, 95 RADIXSORT_OPT, 96 MMAP_OPT 97 }; 98 99 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 100 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; 101 102 static const struct option long_options[] = { 103 { "batch-size", required_argument, NULL, BS_OPT }, 104 { "buffer-size", required_argument, NULL, 'S' }, 105 { "check", optional_argument, NULL, 'c' }, 106 { "check=silent|quiet", optional_argument, NULL, 'C' }, 107 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, 108 { "debug", no_argument, NULL, DEBUG_OPT }, 109 { "dictionary-order", no_argument, NULL, 'd' }, 110 { "field-separator", required_argument, NULL, 't' }, 111 { "files0-from", required_argument, NULL, FF_OPT }, 112 { "general-numeric-sort", no_argument, NULL, 'g' }, 113 { "heapsort", no_argument, NULL, HEAPSORT_OPT }, 114 { "help", no_argument, NULL, HELP_OPT }, 115 { "human-numeric-sort", no_argument, NULL, 'h' }, 116 { "ignore-leading-blanks", no_argument, NULL, 'b' }, 117 { "ignore-case", no_argument, NULL, 'f' }, 118 { "ignore-nonprinting", no_argument, NULL, 'i' }, 119 { "key", required_argument, NULL, 'k' }, 120 { "merge", no_argument, NULL, 'm' }, 121 { "mergesort", no_argument, NULL, 'H' }, 122 { "mmap", no_argument, NULL, MMAP_OPT }, 123 { "month-sort", no_argument, NULL, 'M' }, 124 { "numeric-sort", no_argument, NULL, 'n' }, 125 { "output", required_argument, NULL, 'o' }, 126 { "qsort", no_argument, NULL, QSORT_OPT }, 127 { "radixsort", no_argument, NULL, RADIXSORT_OPT }, 128 { "random-sort", no_argument, NULL, 'R' }, 129 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, 130 { "reverse", no_argument, NULL, 'r' }, 131 { "sort", required_argument, NULL, SORT_OPT }, 132 { "stable", no_argument, NULL, 's' }, 133 { "temporary-directory", required_argument, NULL, 'T' }, 134 { "unique", no_argument, NULL, 'u' }, 135 { "version", no_argument, NULL, VERSION_OPT }, 136 { "version-sort", no_argument, NULL, 'V' }, 137 { "zero-terminated", no_argument, NULL, 'z' }, 138 { NULL, no_argument, NULL, 0 } 139 }; 140 141 /* 142 * Check where sort modifier is present 143 */ 144 static bool 145 sort_modifier_empty(struct sort_mods *sm) 146 { 147 return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || 148 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag); 149 } 150 151 /* 152 * Print out usage text. 153 */ 154 static __dead void 155 usage(int exit_val) 156 { 157 fprintf(exit_val ? stderr : stdout, 158 "usage: %s [-bCcdfgHhiMmnRrsuVz] [-k field1[,field2]] [-o output] " 159 "[-S size]\n\t[-T dir] [-t char] [file ...]\n", getprogname()); 160 exit(exit_val); 161 } 162 163 /* 164 * Read input file names from a file (file0-from option). 165 */ 166 static void 167 read_fns_from_file0(const char *fn) 168 { 169 FILE *f; 170 char *line = NULL; 171 size_t linesize = 0; 172 ssize_t linelen; 173 174 f = fopen(fn, "r"); 175 if (f == NULL) 176 err(2, "%s", fn); 177 178 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) { 179 if (*line != '\0') { 180 if (argc_from_file0 == (size_t)-1) 181 argc_from_file0 = 0; 182 ++argc_from_file0; 183 argv_from_file0 = sort_reallocarray(argv_from_file0, 184 argc_from_file0, sizeof(char *)); 185 argv_from_file0[argc_from_file0 - 1] = line; 186 } else { 187 free(line); 188 } 189 line = NULL; 190 linesize = 0; 191 } 192 if (ferror(f)) 193 err(2, "%s: getdelim", fn); 194 195 closefile(f, fn); 196 } 197 198 /* 199 * Check how much RAM is available for the sort. 200 */ 201 static void 202 set_hw_params(void) 203 { 204 unsigned long long free_memory; 205 long long user_memory; 206 struct rlimit rl; 207 size_t len; 208 int mib[] = { CTL_HW, HW_USERMEM64 }; 209 210 /* Get total user (non-kernel) memory. */ 211 len = sizeof(user_memory); 212 if (sysctl(mib, 2, &user_memory, &len, NULL, 0) == -1) 213 user_memory = -1; 214 215 /* Increase our data size to the max */ 216 if (getrlimit(RLIMIT_DATA, &rl) == 0) { 217 free_memory = (unsigned long long)rl.rlim_cur; 218 rl.rlim_cur = rl.rlim_max; 219 if (setrlimit(RLIMIT_DATA, &rl) == 0) { 220 free_memory = (unsigned long long)rl.rlim_max; 221 } else { 222 warn("Can't set resource limit to max data size"); 223 } 224 } else { 225 free_memory = 1000000; 226 warn("Can't get resource limit for data size"); 227 } 228 229 /* We prefer to use temp files rather than swap space. */ 230 if (user_memory != -1 && free_memory > user_memory) 231 free_memory = user_memory; 232 233 available_free_memory = free_memory / 2; 234 } 235 236 /* 237 * Set directory temporary files. 238 */ 239 static void 240 set_tmpdir(void) 241 { 242 if (!issetugid()) { 243 char *td; 244 245 td = getenv("TMPDIR"); 246 if (td != NULL) 247 tmpdir = td; 248 } 249 } 250 251 /* 252 * Parse -S option. 253 */ 254 static unsigned long long 255 parse_memory_buffer_value(const char *value) 256 { 257 char *endptr; 258 unsigned long long membuf; 259 260 membuf = strtoll(value, &endptr, 10); 261 if (endptr == value || (long long)membuf < 0 || 262 (errno == ERANGE && membuf == LLONG_MAX)) 263 goto invalid; 264 265 switch (*endptr) { 266 case 'Y': 267 if (membuf > ULLONG_MAX / 1024) 268 goto invalid; 269 membuf *= 1024; 270 /* FALLTHROUGH */ 271 case 'Z': 272 if (membuf > ULLONG_MAX / 1024) 273 goto invalid; 274 membuf *= 1024; 275 /* FALLTHROUGH */ 276 case 'E': 277 if (membuf > ULLONG_MAX / 1024) 278 goto invalid; 279 membuf *= 1024; 280 /* FALLTHROUGH */ 281 case 'P': 282 if (membuf > ULLONG_MAX / 1024) 283 goto invalid; 284 membuf *= 1024; 285 /* FALLTHROUGH */ 286 case 'T': 287 if (membuf > ULLONG_MAX / 1024) 288 goto invalid; 289 membuf *= 1024; 290 /* FALLTHROUGH */ 291 case 'G': 292 if (membuf > ULLONG_MAX / 1024) 293 goto invalid; 294 membuf *= 1024; 295 /* FALLTHROUGH */ 296 case 'M': 297 if (membuf > ULLONG_MAX / 1024) 298 goto invalid; 299 membuf *= 1024; 300 /* FALLTHROUGH */ 301 case '\0': 302 case 'K': 303 if (membuf > ULLONG_MAX / 1024) 304 goto invalid; 305 membuf *= 1024; 306 /* FALLTHROUGH */ 307 case 'b': 308 break; 309 case '%': 310 if (available_free_memory != 0 && 311 membuf > ULLONG_MAX / available_free_memory) 312 goto invalid; 313 membuf = (available_free_memory * membuf) / 314 100; 315 break; 316 default: 317 warnc(EINVAL, "%s", optarg); 318 membuf = available_free_memory; 319 } 320 if (membuf > SIZE_MAX) 321 goto invalid; 322 return membuf; 323 invalid: 324 errx(2, "invalid memory buffer size: %s", value); 325 } 326 327 /* 328 * Signal handler that clears the temporary files. 329 */ 330 static void 331 sig_handler(int sig __unused) 332 { 333 clear_tmp_files(); 334 _exit(2); 335 } 336 337 /* 338 * Set signal handler on panic signals. 339 */ 340 static void 341 set_signal_handler(void) 342 { 343 struct sigaction sa; 344 int i, signals[] = {SIGTERM, SIGHUP, SIGINT, SIGUSR1, SIGUSR2, 345 SIGPIPE, SIGXCPU, SIGXFSZ, 0}; 346 347 memset(&sa, 0, sizeof(sa)); 348 sigfillset(&sa.sa_mask); 349 sa.sa_flags = SA_RESTART; 350 sa.sa_handler = sig_handler; 351 352 for (i = 0; signals[i] != 0; i++) { 353 if (sigaction(signals[i], &sa, NULL) == -1) { 354 warn("sigaction(%s)", strsignal(signals[i])); 355 continue; 356 } 357 } 358 } 359 360 /* 361 * Print "unknown" message and exit with status 2. 362 */ 363 static void 364 unknown(const char *what) 365 { 366 errx(2, "Unknown feature: %s", what); 367 } 368 369 /* 370 * Check whether contradictory input options are used. 371 */ 372 static void 373 check_mutually_exclusive_flags(char c, bool *mef_flags) 374 { 375 int i, fo_index, mec; 376 bool found_others, found_this; 377 378 found_others = found_this = false; 379 fo_index = 0; 380 381 for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { 382 mec = mutually_exclusive_flags[i]; 383 384 if (mec != c) { 385 if (mef_flags[i]) { 386 if (found_this) { 387 errx(2, 388 "%c:%c: mutually exclusive flags", 389 c, mec); 390 } 391 found_others = true; 392 fo_index = i; 393 } 394 } else { 395 if (found_others) { 396 errx(2, 397 "%c:%c: mutually exclusive flags", 398 c, mutually_exclusive_flags[fo_index]); 399 } 400 mef_flags[i] = true; 401 found_this = true; 402 } 403 } 404 } 405 406 /* 407 * Initialise sort opts data. 408 */ 409 static void 410 set_sort_opts(void) 411 { 412 memset(&default_sort_mods_object, 0, 413 sizeof(default_sort_mods_object)); 414 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); 415 default_sort_mods_object.func = 416 get_sort_func(&default_sort_mods_object); 417 } 418 419 /* 420 * Set a sort modifier on a sort modifiers object. 421 */ 422 static bool 423 set_sort_modifier(struct sort_mods *sm, int c) 424 { 425 switch (c) { 426 case 'b': 427 sm->bflag = true; 428 break; 429 case 'd': 430 sm->dflag = true; 431 break; 432 case 'f': 433 sm->fflag = true; 434 break; 435 case 'g': 436 sm->gflag = true; 437 need_hint = true; 438 break; 439 case 'i': 440 sm->iflag = true; 441 break; 442 case 'R': 443 sm->Rflag = true; 444 need_random = true; 445 break; 446 case 'M': 447 initialise_months(); 448 sm->Mflag = true; 449 need_hint = true; 450 break; 451 case 'n': 452 sm->nflag = true; 453 need_hint = true; 454 break; 455 case 'r': 456 sm->rflag = true; 457 break; 458 case 'V': 459 sm->Vflag = true; 460 break; 461 case 'h': 462 sm->hflag = true; 463 need_hint = true; 464 break; 465 default: 466 return false; 467 } 468 sort_opts_vals.complex_sort = true; 469 sm->func = get_sort_func(sm); 470 471 return true; 472 } 473 474 /* 475 * Parse POS in -k option. 476 */ 477 static int 478 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) 479 { 480 regmatch_t pmatch[4]; 481 regex_t re; 482 char *c, *f; 483 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; 484 size_t len, nmatch; 485 int ret; 486 487 ret = -1; 488 nmatch = 4; 489 c = f = NULL; 490 491 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 492 return -1; 493 494 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 495 goto end; 496 497 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 498 goto end; 499 500 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 501 goto end; 502 503 len = pmatch[1].rm_eo - pmatch[1].rm_so; 504 505 f = sort_malloc(len + 1); 506 memcpy(f, s + pmatch[1].rm_so, len); 507 f[len] = '\0'; 508 509 if (second) { 510 errno = 0; 511 ks->f2 = (size_t)strtoul(f, NULL, 10); 512 if (errno != 0) 513 goto end; 514 if (ks->f2 == 0) { 515 warn("0 field in key specs"); 516 goto end; 517 } 518 } else { 519 errno = 0; 520 ks->f1 = (size_t)strtoul(f, NULL, 10); 521 if (errno != 0) 522 goto end; 523 if (ks->f1 == 0) { 524 warn("0 field in key specs"); 525 goto end; 526 } 527 } 528 529 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 530 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 531 532 c = sort_malloc(len + 1); 533 memcpy(c, s + pmatch[2].rm_so + 1, len); 534 c[len] = '\0'; 535 536 if (second) { 537 errno = 0; 538 ks->c2 = (size_t)strtoul(c, NULL, 10); 539 if (errno != 0) 540 goto end; 541 } else { 542 errno = 0; 543 ks->c1 = (size_t)strtoul(c, NULL, 10); 544 if (errno != 0) 545 goto end; 546 if (ks->c1 == 0) { 547 warn("0 column in key specs"); 548 goto end; 549 } 550 } 551 } else { 552 if (second) 553 ks->c2 = 0; 554 else 555 ks->c1 = 1; 556 } 557 558 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 559 regoff_t i = 0; 560 561 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { 562 check_mutually_exclusive_flags(s[i], mef_flags); 563 if (s[i] == 'b') { 564 if (second) 565 ks->pos2b = true; 566 else 567 ks->pos1b = true; 568 } else if (!set_sort_modifier(&(ks->sm), s[i])) 569 goto end; 570 } 571 } 572 573 ret = 0; 574 575 end: 576 sort_free(c); 577 sort_free(f); 578 regfree(&re); 579 580 return ret; 581 } 582 583 /* 584 * Parse -k option value. 585 */ 586 static int 587 parse_k(const char *s, struct key_specs *ks) 588 { 589 int ret = -1; 590 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 591 { false, false, false, false, false, false }; 592 593 if (*s != '\0') { 594 char *sptr; 595 596 sptr = strchr(s, ','); 597 if (sptr) { 598 size_t size1; 599 char *pos1, *pos2; 600 601 size1 = sptr - s; 602 603 if (size1 < 1) 604 return -1; 605 606 pos1 = sort_malloc(size1 + 1); 607 memcpy(pos1, s, size1); 608 pos1[size1] = '\0'; 609 610 ret = parse_pos(pos1, ks, mef_flags, false); 611 612 sort_free(pos1); 613 if (ret < 0) 614 return ret; 615 616 pos2 = sort_strdup(sptr + 1); 617 ret = parse_pos(pos2, ks, mef_flags, true); 618 sort_free(pos2); 619 } else 620 ret = parse_pos(s, ks, mef_flags, false); 621 } 622 623 return ret; 624 } 625 626 /* 627 * Parse POS in +POS -POS option. 628 */ 629 static int 630 parse_pos_obs(const char *s, size_t *nf, size_t *nc, char *sopts, size_t sopts_size) 631 { 632 regex_t re; 633 regmatch_t pmatch[4]; 634 char *c, *f; 635 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; 636 int ret; 637 size_t len, nmatch; 638 639 ret = -1; 640 nmatch = 4; 641 c = f = NULL; 642 *nc = *nf = 0; 643 644 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 645 return -1; 646 647 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 648 goto end; 649 650 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 651 goto end; 652 653 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 654 goto end; 655 656 len = pmatch[1].rm_eo - pmatch[1].rm_so; 657 658 f = sort_malloc(len + 1); 659 memcpy(f, s + pmatch[1].rm_so, len); 660 f[len] = '\0'; 661 662 errno = 0; 663 *nf = (size_t)strtoul(f, NULL, 10); 664 if (errno != 0) 665 errx(2, "Invalid key position"); 666 667 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 668 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 669 670 c = sort_malloc(len + 1); 671 memcpy(c, s + pmatch[2].rm_so + 1, len); 672 c[len] = '\0'; 673 674 errno = 0; 675 *nc = (size_t)strtoul(c, NULL, 10); 676 if (errno != 0) 677 errx(2, "Invalid key position"); 678 } 679 680 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 681 682 len = pmatch[3].rm_eo - pmatch[3].rm_so; 683 684 if (len >= sopts_size) 685 errx(2, "Invalid key position"); 686 memcpy(sopts, s + pmatch[3].rm_so, len); 687 sopts[len] = '\0'; 688 } 689 690 ret = 0; 691 692 end: 693 sort_free(c); 694 sort_free(f); 695 regfree(&re); 696 697 return ret; 698 } 699 700 /* 701 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax 702 */ 703 static void 704 fix_obsolete_keys(int *argc, char **argv) 705 { 706 char sopt[129]; 707 int i; 708 709 for (i = 1; i < *argc; i++) { 710 const char *arg1 = argv[i]; 711 712 if (arg1[0] == '+') { 713 size_t c1, f1; 714 char sopts1[128]; 715 716 sopts1[0] = 0; 717 c1 = f1 = 0; 718 719 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1, 720 sizeof(sopts1)) < 0) 721 continue; 722 723 f1 += 1; 724 c1 += 1; 725 if (i + 1 < *argc) { 726 const char *arg2 = argv[i + 1]; 727 728 if (arg2[0] == '-') { 729 size_t c2, f2; 730 char sopts2[128]; 731 732 sopts2[0] = 0; 733 c2 = f2 = 0; 734 735 if (parse_pos_obs(arg2 + 1, &f2, &c2, 736 sopts2, sizeof(sopts2)) >= 0) { 737 int j; 738 if (c2 > 0) 739 f2 += 1; 740 snprintf(sopt, sizeof(sopt), 741 "-k%zu.%zu%s,%zu.%zu%s", 742 f1, c1, sopts1, f2, 743 c2, sopts2); 744 argv[i] = sort_strdup(sopt); 745 for (j = i + 1; j + 1 < *argc; j++) 746 argv[j] = argv[j + 1]; 747 *argc -= 1; 748 continue; 749 } 750 } 751 } 752 snprintf(sopt, sizeof(sopt), "-k%zu.%zu%s", 753 f1, c1, sopts1); 754 argv[i] = sort_strdup(sopt); 755 } 756 } 757 } 758 759 /* 760 * Set random seed 761 */ 762 static void 763 set_random_seed(void) 764 { 765 if (!need_random) 766 return; 767 768 MD5Init(&md5_ctx); 769 if (random_source != NULL) { 770 unsigned char buf[BUFSIZ]; 771 size_t nr; 772 FILE *fp; 773 774 if ((fp = fopen(random_source, "r")) == NULL) 775 err(2, "%s", random_source); 776 while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0) 777 MD5Update(&md5_ctx, buf, nr); 778 if (ferror(fp)) 779 err(2, "%s", random_source); 780 fclose(fp); 781 } else { 782 unsigned char rsd[1024]; 783 784 arc4random_buf(rsd, sizeof(rsd)); 785 MD5Update(&md5_ctx, rsd, sizeof(rsd)); 786 } 787 } 788 789 /* 790 * Main function. 791 */ 792 int 793 main(int argc, char *argv[]) 794 { 795 char *outfile, *real_outfile, *sflag; 796 int c; 797 size_t i; 798 struct sort_mods *sm = &default_sort_mods_object; 799 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 800 { false, false, false, false, false, false }; 801 802 set_hw_params(); 803 804 if (pledge("stdio rpath wpath cpath fattr chown proc exec", NULL) == -1) 805 err(2, "pledge"); 806 807 outfile = "-"; 808 real_outfile = NULL; 809 sflag = NULL; 810 811 init_tmp_files(); 812 813 set_signal_handler(); 814 815 atexit(clear_tmp_files); 816 817 set_tmpdir(); 818 set_sort_opts(); 819 820 fix_obsolete_keys(&argc, argv); 821 822 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) 823 != -1)) { 824 825 check_mutually_exclusive_flags(c, mef_flags); 826 827 if (!set_sort_modifier(sm, c)) { 828 switch (c) { 829 case 'c': 830 sort_opts_vals.cflag = true; 831 if (optarg) { 832 if (!strcmp(optarg, "diagnose-first")) 833 ; 834 else if (!strcmp(optarg, "silent") || 835 !strcmp(optarg, "quiet")) 836 sort_opts_vals.csilentflag = true; 837 else if (*optarg) 838 unknown(optarg); 839 } 840 break; 841 case 'C': 842 sort_opts_vals.cflag = true; 843 sort_opts_vals.csilentflag = true; 844 break; 845 case 'k': 846 { 847 sort_opts_vals.complex_sort = true; 848 sort_opts_vals.kflag = true; 849 850 keys = sort_reallocarray(keys, keys_num + 1, 851 sizeof(struct key_specs)); 852 memset(&(keys[keys_num]), 0, 853 sizeof(struct key_specs)); 854 #ifndef GNUSORT_COMPATIBILITY 855 keys[keys_num].pos1b = default_sort_mods->bflag; 856 keys[keys_num].pos2b = default_sort_mods->bflag; 857 #endif 858 859 if (parse_k(optarg, &(keys[keys_num++])) < 0) 860 errc(2, EINVAL, "-k %s", optarg); 861 862 break; 863 } 864 case 'm': 865 sort_opts_vals.mflag = true; 866 break; 867 case 'o': 868 outfile = optarg; 869 break; 870 case 's': 871 sort_opts_vals.sflag = true; 872 break; 873 case 'S': 874 sflag = optarg; 875 break; 876 case 'T': 877 tmpdir = optarg; 878 break; 879 case 't': 880 while (strlen(optarg) > 1) { 881 if (optarg[0] != '\\') { 882 errc(2, EINVAL, "%s", optarg); 883 } 884 optarg += 1; 885 if (*optarg == '0') { 886 *optarg = 0; 887 break; 888 } 889 } 890 sort_opts_vals.tflag = true; 891 sort_opts_vals.field_sep = btowc(optarg[0]); 892 if (sort_opts_vals.field_sep == WEOF) { 893 errno = EINVAL; 894 err(2, NULL); 895 } 896 break; 897 case 'u': 898 sort_opts_vals.uflag = true; 899 /* stable sort for the correct unique val */ 900 sort_opts_vals.sflag = true; 901 break; 902 case 'z': 903 sort_opts_vals.zflag = true; 904 break; 905 case SORT_OPT: 906 if (!strcmp(optarg, "general-numeric")) 907 set_sort_modifier(sm, 'g'); 908 else if (!strcmp(optarg, "human-numeric")) 909 set_sort_modifier(sm, 'h'); 910 else if (!strcmp(optarg, "numeric")) 911 set_sort_modifier(sm, 'n'); 912 else if (!strcmp(optarg, "month")) 913 set_sort_modifier(sm, 'M'); 914 else if (!strcmp(optarg, "random")) 915 set_sort_modifier(sm, 'R'); 916 else 917 unknown(optarg); 918 break; 919 case QSORT_OPT: 920 sort_opts_vals.sort_method = SORT_QSORT; 921 break; 922 case 'H': 923 sort_opts_vals.sort_method = SORT_MERGESORT; 924 break; 925 case MMAP_OPT: 926 use_mmap = true; 927 break; 928 case HEAPSORT_OPT: 929 sort_opts_vals.sort_method = SORT_HEAPSORT; 930 break; 931 case RADIXSORT_OPT: 932 sort_opts_vals.sort_method = SORT_RADIXSORT; 933 break; 934 case RANDOMSOURCE_OPT: 935 random_source = optarg; 936 break; 937 case COMPRESSPROGRAM_OPT: 938 compress_program = optarg; 939 break; 940 case FF_OPT: 941 read_fns_from_file0(optarg); 942 break; 943 case BS_OPT: 944 { 945 const char *errstr; 946 947 max_open_files = strtonum(optarg, 2, 948 UINT_MAX - 1, &errstr) + 1; 949 if (errstr != NULL) 950 errx(2, "--batch-size argument is %s", 951 errstr); 952 break; 953 } 954 case VERSION_OPT: 955 printf("%s\n", VERSION); 956 exit(EXIT_SUCCESS); 957 /* NOTREACHED */ 958 break; 959 case DEBUG_OPT: 960 debug_sort = true; 961 break; 962 case HELP_OPT: 963 usage(0); 964 /* NOTREACHED */ 965 break; 966 default: 967 usage(2); 968 /* NOTREACHED */ 969 } 970 } 971 } 972 argc -= optind; 973 argv += optind; 974 975 if (compress_program == NULL) { 976 if (pledge("stdio rpath wpath cpath fattr chown", NULL) == -1) 977 err(2, "pledge"); 978 } 979 980 #ifndef GNUSORT_COMPATIBILITY 981 if (argc > 2 && strcmp(argv[argc - 2], "-o") == 0) { 982 outfile = argv[argc - 1]; 983 argc -= 2; 984 } 985 #endif 986 987 if (argv_from_file0) { 988 argc = argc_from_file0; 989 argv = argv_from_file0; 990 } 991 992 if (sort_opts_vals.cflag) { 993 if (argc > 1) 994 errx(2, "only one input file is allowed with the -%c flag", 995 sort_opts_vals.csilentflag ? 'C' : 'c'); 996 997 if (argc == 0 || strcmp(argv[0], "-") == 0) { 998 if (compress_program) { 999 if (pledge("stdio proc exec", NULL) == -1) 1000 err(2, "pledge"); 1001 } else { 1002 if (pledge("stdio", NULL) == -1) 1003 err(2, "pledge"); 1004 } 1005 } else { 1006 if (compress_program) { 1007 if (pledge("stdio rpath proc exec", NULL) == -1) 1008 err(2, "pledge"); 1009 } else { 1010 if (pledge("stdio rpath", NULL) == -1) 1011 err(2, "pledge"); 1012 } 1013 } 1014 } else { 1015 /* Case when the outfile equals one of the input files: */ 1016 if (strcmp(outfile, "-") != 0) { 1017 struct stat sb; 1018 int fd, i; 1019 1020 for (i = 0; i < argc; ++i) { 1021 if (strcmp(argv[i], outfile) == 0) { 1022 if (stat(outfile, &sb) == -1) 1023 err(2, "%s", outfile); 1024 if (access(outfile, W_OK) == -1) 1025 err(2, "%s", outfile); 1026 real_outfile = outfile; 1027 sort_asprintf(&outfile, "%s.XXXXXXXXXX", 1028 real_outfile); 1029 if ((fd = mkstemp(outfile)) == -1) 1030 err(2, "%s", outfile); 1031 (void)fchown(fd, sb.st_uid, sb.st_gid); 1032 if (fchmod(fd, sb.st_mode & ACCESSPERMS) == -1) 1033 err(2, "%s", outfile); 1034 close(fd); 1035 tmp_file_atexit(outfile); 1036 break; 1037 } 1038 } 1039 } 1040 1041 if (compress_program) { 1042 if (pledge("stdio rpath wpath cpath proc exec", NULL) == -1) 1043 err(2, "pledge"); 1044 } else { 1045 if (pledge("stdio rpath wpath cpath", NULL) == -1) 1046 err(2, "pledge"); 1047 } 1048 } 1049 1050 if (sflag != NULL) 1051 available_free_memory = parse_memory_buffer_value(sflag); 1052 1053 if (keys_num == 0) { 1054 keys_num = 1; 1055 keys = sort_reallocarray(keys, 1, sizeof(struct key_specs)); 1056 memset(&(keys[0]), 0, sizeof(struct key_specs)); 1057 keys[0].c1 = 1; 1058 #ifdef GNUSORT_COMPATIBILITY 1059 keys[0].pos1b = sm->bflag; 1060 keys[0].pos2b = sm->bflag; 1061 #endif 1062 memcpy(&(keys[0].sm), sm, sizeof(struct sort_mods)); 1063 } 1064 1065 for (i = 0; i < keys_num; i++) { 1066 struct key_specs *ks; 1067 1068 ks = &(keys[i]); 1069 1070 if (sort_modifier_empty(&(ks->sm))) { 1071 #ifdef GNUSORT_COMPATIBILITY 1072 if (!(ks->pos1b) && !(ks->pos2b)) { 1073 ks->pos1b = sm->bflag; 1074 ks->pos2b = sm->bflag; 1075 } 1076 #endif 1077 memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); 1078 } 1079 1080 ks->sm.func = get_sort_func(&(ks->sm)); 1081 } 1082 1083 if (debug_sort) 1084 printf("Memory to be used for sorting: %llu\n", 1085 available_free_memory); 1086 1087 if (sort_opts_vals.cflag) 1088 return check(argc ? *argv : "-"); 1089 1090 set_random_seed(); 1091 1092 if (!sort_opts_vals.mflag) { 1093 struct file_list fl; 1094 struct sort_list list; 1095 1096 sort_list_init(&list); 1097 file_list_init(&fl, true); 1098 1099 if (argc < 1) 1100 procfile("-", &list, &fl); 1101 else { 1102 while (argc > 0) { 1103 procfile(*argv, &list, &fl); 1104 --argc; 1105 ++argv; 1106 } 1107 } 1108 1109 if (fl.count < 1) 1110 sort_list_to_file(&list, outfile); 1111 else { 1112 if (list.count > 0) { 1113 char *flast = new_tmp_file_name(); 1114 1115 sort_list_to_file(&list, flast); 1116 file_list_add(&fl, flast, false); 1117 } 1118 merge_files(&fl, outfile); 1119 } 1120 1121 file_list_clean(&fl); 1122 1123 /* 1124 * We are about to exit the program, so we can ignore 1125 * the clean-up for speed 1126 * 1127 * sort_list_clean(&list); 1128 */ 1129 1130 } else { 1131 struct file_list fl; 1132 1133 file_list_init(&fl, false); 1134 if (argc < 1) 1135 file_list_add(&fl, "-", true); 1136 else 1137 file_list_populate(&fl, argc, argv, true); 1138 merge_files(&fl, outfile); 1139 file_list_clean(&fl); 1140 } 1141 1142 if (real_outfile) { 1143 if (rename(outfile, real_outfile) == -1) 1144 err(2, "%s", real_outfile); 1145 sort_free(outfile); 1146 } 1147 1148 return 0; 1149 } 1150