1 /* $NetBSD: texindex.c,v 1.3 2024/04/07 12:30:38 christos Exp $ */ 2 3 /* texindex -- sort TeX index dribble output into an actual index. 4 Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp 5 6 Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001, 7 2002, 2003, 2004 Free Software Foundation, Inc. 8 9 This program is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 2, or (at your option) 12 any later version. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, write to the Free Software 21 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */ 22 23 #include "system.h" 24 #include <getopt.h> 25 26 static char *program_name = "texindex"; 27 28 #if defined (emacs) 29 # include "../src/config.h" 30 /* Some s/os.h files redefine these. */ 31 # undef read 32 # undef close 33 # undef write 34 # undef open 35 #endif 36 37 #if !defined (HAVE_MEMSET) 38 #undef memset 39 #define memset(ptr, ignore, count) bzero (ptr, count) 40 #endif 41 42 #if !defined (SEEK_SET) 43 # define SEEK_SET 0 44 # define SEEK_CUR 1 45 # define SEEK_END 2 46 #endif /* !SEEK_SET */ 47 48 /* When sorting in core, this structure describes one line 49 and the position and length of its first keyfield. */ 50 struct lineinfo 51 { 52 char *text; /* The actual text of the line. */ 53 union { 54 char *text; /* The start of the key (for textual comparison). */ 55 long number; /* The numeric value (for numeric comparison). */ 56 } key; 57 long keylen; /* Length of KEY field. */ 58 size_t idx; /* tie breaker */ 59 }; 60 61 /* This structure describes a field to use as a sort key. */ 62 struct keyfield 63 { 64 int startwords; /* Number of words to skip. */ 65 int startchars; /* Number of additional chars to skip. */ 66 int endwords; /* Number of words to ignore at end. */ 67 int endchars; /* Ditto for characters of last word. */ 68 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */ 69 char fold_case; /* Non-zero means case doesn't matter. */ 70 char reverse; /* Non-zero means compare in reverse order. */ 71 char numeric; /* Non-zeros means field is ASCII numeric. */ 72 char positional; /* Sort according to file position. */ 73 char braced; /* Count balanced-braced groupings as fields. */ 74 }; 75 76 /* Vector of keyfields to use. */ 77 struct keyfield keyfields[3]; 78 79 /* Number of keyfields stored in that vector. */ 80 int num_keyfields = 3; 81 82 /* Vector of input file names, terminated with a null pointer. */ 83 char **infiles; 84 85 /* Vector of corresponding output file names, or NULL, meaning default it 86 (add an `s' to the end). */ 87 char **outfiles; 88 89 /* Length of `infiles'. */ 90 int num_infiles; 91 92 /* Pointer to the array of pointers to lines being sorted. */ 93 char **linearray; 94 95 /* The allocated length of `linearray'. */ 96 long nlines; 97 98 /* During in-core sort, this points to the base of the data block 99 which contains all the lines of data. */ 100 char *text_base; 101 102 /* Initially 0; changed to 1 if we want initials in this index. */ 103 int need_initials; 104 105 /* Remembers the first initial letter seen in this index, so we can 106 determine whether we need initials in the sorted form. */ 107 char first_initial; 108 109 /* Forward declarations of functions in this file. */ 110 void decode_command (int argc, char **argv); 111 void sort_in_core (char *infile, int total, char *outfile); 112 char **parsefile (char *filename, char **nextline, char *data, long int size); 113 char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr); 114 char *find_pos (char *str, int words, int chars, int ignore_blanks); 115 long find_value (char *start, long int length); 116 char *find_braced_pos (char *str, int words, int chars, int ignore_blanks); 117 char *find_braced_end (char *str); 118 void writelines (char **linearray, int nlines, FILE *ostream); 119 int compare_field (struct keyfield *keyfield, char *start1, 120 long int length1, long int pos1, char *start2, 121 long int length2, long int pos2); 122 int compare_full (const void *, const void *); 123 void pfatal_with_name (const char *name); 124 void fatal (const char *format, const char *arg); 125 void error (const char *format, const char *arg); 126 void *xmalloc (), *xrealloc (); 127 static char *concat3 (const char *, const char *, const char *); 128 129 int 130 main (int argc, char **argv) 131 { 132 int i; 133 134 #ifdef HAVE_SETLOCALE 135 /* Set locale via LC_ALL. */ 136 setlocale (LC_ALL, ""); 137 #endif 138 139 /* Set the text message domain. */ 140 bindtextdomain (PACKAGE, LOCALEDIR); 141 textdomain (PACKAGE); 142 143 /* In case we write to a redirected stdout that fails. */ 144 /* not ready atexit (close_stdout); */ 145 146 /* Describe the kind of sorting to do. */ 147 /* The first keyfield uses the first braced field and folds case. */ 148 keyfields[0].braced = 1; 149 keyfields[0].fold_case = 1; 150 keyfields[0].endwords = -1; 151 keyfields[0].endchars = -1; 152 153 /* The second keyfield uses the second braced field, numerically. */ 154 keyfields[1].braced = 1; 155 keyfields[1].numeric = 1; 156 keyfields[1].startwords = 1; 157 keyfields[1].endwords = -1; 158 keyfields[1].endchars = -1; 159 160 /* The third keyfield (which is ignored while discarding duplicates) 161 compares the whole line. */ 162 keyfields[2].endwords = -1; 163 keyfields[2].endchars = -1; 164 165 decode_command (argc, argv); 166 167 /* Process input files completely, one by one. */ 168 169 for (i = 0; i < num_infiles; i++) 170 { 171 int desc; 172 off_t ptr; 173 char *outfile; 174 struct stat instat; 175 176 desc = open (infiles[i], O_RDONLY, 0); 177 if (desc < 0) 178 pfatal_with_name (infiles[i]); 179 180 if (stat (infiles[i], &instat)) 181 pfatal_with_name (infiles[i]); 182 if (S_ISDIR (instat.st_mode)) 183 { 184 #ifdef EISDIR 185 errno = EISDIR; 186 #endif 187 pfatal_with_name (infiles[i]); 188 } 189 190 lseek (desc, (off_t) 0, SEEK_END); 191 ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR); 192 193 close (desc); 194 195 outfile = outfiles[i]; 196 if (!outfile) 197 outfile = concat3 (infiles[i], "s", ""); 198 199 need_initials = 0; 200 first_initial = '\0'; 201 202 if (ptr != (int)ptr) 203 { 204 fprintf (stderr, "%s: %s: file too large\n", program_name, 205 infiles[i]); 206 xexit (1); 207 } 208 sort_in_core (infiles[i], (int)ptr, outfile); 209 } 210 211 xexit (0); 212 return 0; /* Avoid bogus warnings. */ 213 } 214 215 typedef struct 216 { 217 char *long_name; 218 char *short_name; 219 int *variable_ref; 220 int variable_value; 221 char *arg_name; 222 char *doc_string; 223 } TEXINDEX_OPTION; 224 225 TEXINDEX_OPTION texindex_options[] = { 226 { "--help", "-h", (int *)NULL, 0, (char *)NULL, 227 N_("display this help and exit") }, 228 { "--output", "-o", (int *)NULL, 0, "FILE", 229 N_("send output to FILE") }, 230 { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL, 231 N_("display version information and exit") }, 232 { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL } 233 }; 234 235 void 236 usage (int result_value) 237 { 238 register int i; 239 FILE *f = result_value ? stderr : stdout; 240 241 fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name); 242 fprintf (f, _("Generate a sorted index for each TeX output FILE.\n")); 243 /* Avoid trigraph nonsense. */ 244 fprintf (f, 245 _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"), 246 '?', '?'); /* avoid trigraph in cat-id-tbl.c */ 247 fprintf (f, _("\nOptions:\n")); 248 249 for (i = 0; texindex_options[i].long_name; i++) 250 { 251 putc (' ', f); 252 253 if (texindex_options[i].short_name) 254 fprintf (f, "%s, ", texindex_options[i].short_name); 255 256 fprintf (f, "%s %s", 257 texindex_options[i].long_name, 258 texindex_options[i].arg_name 259 ? texindex_options[i].arg_name : ""); 260 261 fprintf (f, "\t%s\n", _(texindex_options[i].doc_string)); 262 } 263 fputs (_("\n\ 264 Email bug reports to bug-texinfo@gnu.org,\n\ 265 general questions and discussion to help-texinfo@gnu.org.\n\ 266 Texinfo home page: http://www.gnu.org/software/texinfo/"), f); 267 fputs ("\n", f); 268 269 xexit (result_value); 270 } 271 272 /* Decode the command line arguments to set the parameter variables 273 and set up the vector of keyfields and the vector of input files. */ 274 275 void 276 decode_command (int argc, char **argv) 277 { 278 int arg_index = 1; 279 char **ip; 280 char **op; 281 282 /* Allocate ARGC input files, which must be enough. */ 283 284 infiles = (char **) xmalloc (argc * sizeof (char *)); 285 outfiles = (char **) xmalloc (argc * sizeof (char *)); 286 ip = infiles; 287 op = outfiles; 288 289 while (arg_index < argc) 290 { 291 char *arg = argv[arg_index++]; 292 293 if (*arg == '-') 294 { 295 if (strcmp (arg, "--version") == 0) 296 { 297 printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION); 298 puts (""); 299 puts ("Copyright (C) 2004 Free Software Foundation, Inc."); 300 printf (_("There is NO warranty. You may redistribute this software\n\ 301 under the terms of the GNU General Public License.\n\ 302 For more information about these matters, see the files named COPYING.\n")); 303 xexit (0); 304 } 305 else if ((strcmp (arg, "--keep") == 0) || 306 (strcmp (arg, "-k") == 0)) 307 { 308 /* Ignore, for backward compatibility */ 309 } 310 else if ((strcmp (arg, "--help") == 0) || 311 (strcmp (arg, "-h") == 0)) 312 { 313 usage (0); 314 } 315 else if ((strcmp (arg, "--output") == 0) || 316 (strcmp (arg, "-o") == 0)) 317 { 318 if (argv[arg_index] != (char *)NULL) 319 { 320 arg_index++; 321 if (op > outfiles) 322 *(op - 1) = argv[arg_index]; 323 } 324 else 325 usage (1); 326 } 327 else 328 usage (1); 329 } 330 else 331 { 332 *ip++ = arg; 333 *op++ = (char *)NULL; 334 } 335 } 336 337 /* Record number of keyfields and terminate list of filenames. */ 338 num_infiles = ip - infiles; 339 *ip = (char *)NULL; 340 if (num_infiles == 0) 341 usage (1); 342 } 343 344 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */ 345 346 int 347 compare_full (const void *p1, const void *p2) 348 { 349 char **line1 = (char **) p1; 350 char **line2 = (char **) p2; 351 int i; 352 353 /* Compare using the first keyfield; 354 if that does not distinguish the lines, try the second keyfield; 355 and so on. */ 356 357 for (i = 0; i < num_keyfields; i++) 358 { 359 long length1, length2; 360 char *start1 = find_field (&keyfields[i], *line1, &length1); 361 char *start2 = find_field (&keyfields[i], *line2, &length2); 362 int tem = compare_field (&keyfields[i], start1, length1, 363 *line1 - text_base, 364 start2, length2, *line2 - text_base); 365 if (tem) 366 { 367 if (keyfields[i].reverse) 368 return -tem; 369 return tem; 370 } 371 } 372 373 if (*line1 == *line2) 374 abort (); 375 return *line1 < *line2 ? -1 : 1; 376 } 377 378 /* Compare LINE1 and LINE2, described by structures 379 in which the first keyfield is identified in advance. 380 For positional sorting, assumes that the order of the lines in core 381 reflects their nominal order. */ 382 int 383 compare_prepared (const void *p1, const void *p2) 384 { 385 struct lineinfo *line1 = (struct lineinfo *) p1; 386 struct lineinfo *line2 = (struct lineinfo *) p2; 387 int i; 388 int tem; 389 char *text1, *text2; 390 391 /* Compare using the first keyfield, which has been found for us already. */ 392 if (keyfields->positional) 393 { 394 if (line1->text - text_base > line2->text - text_base) 395 tem = 1; 396 else 397 tem = -1; 398 } 399 else if (keyfields->numeric) 400 tem = line1->key.number - line2->key.number; 401 else 402 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, 403 line2->key.text, line2->keylen, 0); 404 if (tem) 405 { 406 if (keyfields->reverse) 407 return -tem; 408 return tem; 409 } 410 411 text1 = line1->text; 412 text2 = line2->text; 413 414 /* Compare using the second keyfield; 415 if that does not distinguish the lines, try the third keyfield; 416 and so on. */ 417 418 for (i = 1; i < num_keyfields; i++) 419 { 420 long length1, length2; 421 char *start1 = find_field (&keyfields[i], text1, &length1); 422 char *start2 = find_field (&keyfields[i], text2, &length2); 423 int tem = compare_field (&keyfields[i], start1, length1, 424 text1 - text_base, 425 start2, length2, text2 - text_base); 426 if (tem) 427 { 428 if (keyfields[i].reverse) 429 return -tem; 430 return tem; 431 } 432 } 433 434 if (line1->idx == line2->idx) 435 abort (); 436 return line1->idx < line2->idx ? -1 : 1; 437 } 438 439 /* Like compare_full but more general. 440 You can pass any strings, and you can say how many keyfields to use. 441 POS1 and POS2 should indicate the nominal positional ordering of 442 the two lines in the input. */ 443 444 int 445 compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields) 446 { 447 int i; 448 449 /* Compare using the first keyfield; 450 if that does not distinguish the lines, try the second keyfield; 451 and so on. */ 452 453 for (i = 0; i < use_keyfields; i++) 454 { 455 long length1, length2; 456 char *start1 = find_field (&keyfields[i], str1, &length1); 457 char *start2 = find_field (&keyfields[i], str2, &length2); 458 int tem = compare_field (&keyfields[i], start1, length1, pos1, 459 start2, length2, pos2); 460 if (tem) 461 { 462 if (keyfields[i].reverse) 463 return -tem; 464 return tem; 465 } 466 } 467 468 return 0; /* Lines match exactly. */ 469 } 470 471 /* Find the start and length of a field in STR according to KEYFIELD. 472 A pointer to the starting character is returned, and the length 473 is stored into the int that LENGTHPTR points to. */ 474 475 char * 476 find_field (struct keyfield *keyfield, char *str, long int *lengthptr) 477 { 478 char *start; 479 char *end; 480 char *(*fun) (); 481 482 if (keyfield->braced) 483 fun = find_braced_pos; 484 else 485 fun = find_pos; 486 487 start = (*fun) (str, keyfield->startwords, keyfield->startchars, 488 keyfield->ignore_blanks); 489 if (keyfield->endwords < 0) 490 { 491 if (keyfield->braced) 492 end = find_braced_end (start); 493 else 494 { 495 end = start; 496 while (*end && *end != '\n') 497 end++; 498 } 499 } 500 else 501 { 502 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0); 503 if (end - str < start - str) 504 end = start; 505 } 506 *lengthptr = end - start; 507 return start; 508 } 509 510 /* Return a pointer to a specified place within STR, 511 skipping (from the beginning) WORDS words and then CHARS chars. 512 If IGNORE_BLANKS is nonzero, we skip all blanks 513 after finding the specified word. */ 514 515 char * 516 find_pos (char *str, int words, int chars, int ignore_blanks) 517 { 518 int i; 519 char *p = str; 520 521 for (i = 0; i < words; i++) 522 { 523 char c; 524 /* Find next bunch of nonblanks and skip them. */ 525 while ((c = *p) == ' ' || c == '\t') 526 p++; 527 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) 528 p++; 529 if (!*p || *p == '\n') 530 return p; 531 } 532 533 while (*p == ' ' || *p == '\t') 534 p++; 535 536 for (i = 0; i < chars; i++) 537 { 538 if (!*p || *p == '\n') 539 break; 540 p++; 541 } 542 return p; 543 } 544 545 /* Like find_pos but assumes that each field is surrounded by braces 546 and that braces within fields are balanced. */ 547 548 char * 549 find_braced_pos (char *str, int words, int chars, int ignore_blanks) 550 { 551 int i; 552 int bracelevel; 553 char *p = str; 554 char c; 555 556 for (i = 0; i < words; i++) 557 { 558 bracelevel = 1; 559 while ((c = *p++) != '{' && c != '\n' && c) 560 /* Do nothing. */ ; 561 if (c != '{') 562 return p - 1; 563 while (bracelevel) 564 { 565 c = *p++; 566 if (c == '{') 567 bracelevel++; 568 if (c == '}') 569 bracelevel--; 570 if (c == 0 || c == '\n') 571 return p - 1; 572 } 573 } 574 575 while ((c = *p++) != '{' && c != '\n' && c) 576 /* Do nothing. */ ; 577 578 if (c != '{') 579 return p - 1; 580 581 if (ignore_blanks) 582 while ((c = *p) == ' ' || c == '\t') 583 p++; 584 585 for (i = 0; i < chars; i++) 586 { 587 if (!*p || *p == '\n') 588 break; 589 p++; 590 } 591 return p; 592 } 593 594 /* Find the end of the balanced-brace field which starts at STR. 595 The position returned is just before the closing brace. */ 596 597 char * 598 find_braced_end (char *str) 599 { 600 int bracelevel; 601 char *p = str; 602 char c; 603 604 bracelevel = 1; 605 while (bracelevel) 606 { 607 c = *p++; 608 if (c == '{') 609 bracelevel++; 610 if (c == '}') 611 bracelevel--; 612 if (c == 0 || c == '\n') 613 return p - 1; 614 } 615 return p - 1; 616 } 617 618 long 619 find_value (char *start, long int length) 620 { 621 while (length != 0L) 622 { 623 if (isdigit (*start)) 624 return atol (start); 625 length--; 626 start++; 627 } 628 return 0l; 629 } 630 631 /* Vector used to translate characters for comparison. 632 This is how we make all alphanumerics follow all else, 633 and ignore case in the first sorting. */ 634 int char_order[256]; 635 636 void 637 init_char_order (void) 638 { 639 int i; 640 for (i = 1; i < 256; i++) 641 char_order[i] = i; 642 643 for (i = '0'; i <= '9'; i++) 644 char_order[i] += 512; 645 646 for (i = 'a'; i <= 'z'; i++) 647 { 648 char_order[i] = 512 + i; 649 char_order[i + 'A' - 'a'] = 512 + i; 650 } 651 } 652 653 /* Compare two fields (each specified as a start pointer and a character count) 654 according to KEYFIELD. 655 The sign of the value reports the relation between the fields. */ 656 657 int 658 compare_field (struct keyfield *keyfield, char *start1, long int length1, 659 long int pos1, char *start2, long int length2, long int pos2) 660 { 661 if (keyfields->positional) 662 { 663 if (pos1 > pos2) 664 return 1; 665 else 666 return -1; 667 } 668 if (keyfield->numeric) 669 { 670 long value = find_value (start1, length1) - find_value (start2, length2); 671 if (value > 0) 672 return 1; 673 if (value < 0) 674 return -1; 675 return 0; 676 } 677 else 678 { 679 char *p1 = start1; 680 char *p2 = start2; 681 char *e1 = start1 + length1; 682 char *e2 = start2 + length2; 683 684 while (1) 685 { 686 int c1, c2; 687 688 if (p1 == e1) 689 c1 = 0; 690 else 691 c1 = *p1++; 692 if (p2 == e2) 693 c2 = 0; 694 else 695 c2 = *p2++; 696 697 if (char_order[c1] != char_order[c2]) 698 return char_order[c1] - char_order[c2]; 699 if (!c1) 700 break; 701 } 702 703 /* Strings are equal except possibly for case. */ 704 p1 = start1; 705 p2 = start2; 706 while (1) 707 { 708 int c1, c2; 709 710 if (p1 == e1) 711 c1 = 0; 712 else 713 c1 = *p1++; 714 if (p2 == e2) 715 c2 = 0; 716 else 717 c2 = *p2++; 718 719 if (c1 != c2) 720 /* Reverse sign here so upper case comes out last. */ 721 return c2 - c1; 722 if (!c1) 723 break; 724 } 725 726 return 0; 727 } 728 } 729 730 /* Sort INFILE, whose size is TOTAL, 731 assuming that is small enough to be done in-core, 732 then indexify it and send the output to OUTFILE (or to stdout). */ 733 734 void 735 sort_in_core (char *infile, int total, char *outfile) 736 { 737 char **nextline; 738 char *data = (char *) xmalloc (total + 1); 739 char *file_data; 740 long file_size; 741 int i; 742 FILE *ostream = stdout; 743 struct lineinfo *lineinfo; 744 745 /* Read the contents of the file into the moby array `data'. */ 746 747 int desc = open (infile, O_RDONLY, 0); 748 749 if (desc < 0) 750 fatal (_("failure reopening %s"), infile); 751 for (file_size = 0;;) 752 { 753 i = read (desc, data + file_size, total - file_size); 754 if (i <= 0) 755 break; 756 file_size += i; 757 } 758 file_data = data; 759 data[file_size] = 0; 760 761 close (desc); 762 763 if (file_size > 0 && data[0] != '\\' && data[0] != '@') 764 { 765 error (_("%s: not a texinfo index file"), infile); 766 return; 767 } 768 769 init_char_order (); 770 771 /* Sort routines want to know this address. */ 772 773 text_base = data; 774 775 /* Create the array of pointers to lines, with a default size 776 frequently enough. */ 777 778 nlines = total / 50; 779 if (!nlines) 780 nlines = 2; 781 linearray = (char **) xmalloc (nlines * sizeof (char *)); 782 783 /* `nextline' points to the next free slot in this array. 784 `nlines' is the allocated size. */ 785 786 nextline = linearray; 787 788 /* Parse the input file's data, and make entries for the lines. */ 789 790 nextline = parsefile (infile, nextline, file_data, file_size); 791 if (nextline == 0) 792 { 793 error (_("%s: not a texinfo index file"), infile); 794 return; 795 } 796 797 /* Sort the lines. */ 798 799 /* If we have enough space, find the first keyfield of each line in advance. 800 Make a `struct lineinfo' for each line, which records the keyfield 801 as well as the line, and sort them. */ 802 803 lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo)); 804 805 if (lineinfo) 806 { 807 size_t idx = 0; 808 struct lineinfo *lp; 809 char **p; 810 811 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) 812 { 813 lp->idx = idx++; 814 lp->text = *p; 815 lp->key.text = find_field (keyfields, *p, &lp->keylen); 816 if (keyfields->numeric) 817 lp->key.number = find_value (lp->key.text, lp->keylen); 818 } 819 820 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), 821 compare_prepared); 822 823 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) 824 *p = lp->text; 825 826 free (lineinfo); 827 } 828 else 829 qsort (linearray, nextline - linearray, sizeof (char *), compare_full); 830 831 /* Open the output file. */ 832 833 if (outfile) 834 { 835 ostream = fopen (outfile, "w"); 836 if (!ostream) 837 pfatal_with_name (outfile); 838 } 839 840 writelines (linearray, nextline - linearray, ostream); 841 if (outfile) 842 fclose (ostream); 843 844 free (linearray); 845 free (data); 846 } 847 848 /* Parse an input string in core into lines. 849 DATA is the input string, and SIZE is its length. 850 Data goes in LINEARRAY starting at NEXTLINE. 851 The value returned is the first entry in LINEARRAY still unused. 852 Value 0 means input file contents are invalid. */ 853 854 char ** 855 parsefile (char *filename, char **nextline, char *data, long int size) 856 { 857 char *p, *end; 858 char **line = nextline; 859 860 p = data; 861 end = p + size; 862 *end = 0; 863 864 while (p != end) 865 { 866 if (p[0] != '\\' && p[0] != '@') 867 return 0; 868 869 *line = p; 870 871 /* Find the first letter of the first field of this line. If it 872 is different from the first letter of the first field of the 873 first line, we need initial headers in the output index. */ 874 while (*p && *p != '{') 875 p++; 876 if (p == end) 877 return 0; 878 p++; 879 if (first_initial) 880 { 881 if (first_initial != toupper (*p)) 882 need_initials = 1; 883 } 884 else 885 first_initial = toupper (*p); 886 887 while (*p && *p != '\n') 888 p++; 889 if (p != end) 890 p++; 891 892 line++; 893 if (line == linearray + nlines) 894 { 895 char **old = linearray; 896 linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4)); 897 line += linearray - old; 898 } 899 } 900 901 return line; 902 } 903 904 /* Indexification is a filter applied to the sorted lines 905 as they are being written to the output file. 906 Multiple entries for the same name, with different page numbers, 907 get combined into a single entry with multiple page numbers. 908 The first braced field, which is used for sorting, is discarded. 909 However, its first character is examined, folded to lower case, 910 and if it is different from that in the previous line fed to us 911 a \initial line is written with one argument, the new initial. 912 913 If an entry has four braced fields, then the second and third 914 constitute primary and secondary names. 915 In this case, each change of primary name 916 generates a \primary line which contains only the primary name, 917 and in between these are \secondary lines which contain 918 just a secondary name and page numbers. */ 919 920 /* The last primary name we wrote a \primary entry for. 921 If only one level of indexing is being done, this is the last name seen. */ 922 char *lastprimary; 923 /* Length of storage allocated for lastprimary. */ 924 int lastprimarylength; 925 926 /* Similar, for the secondary name. */ 927 char *lastsecondary; 928 int lastsecondarylength; 929 930 /* Zero if we are not in the middle of writing an entry. 931 One if we have written the beginning of an entry but have not 932 yet written any page numbers into it. 933 Greater than one if we have written the beginning of an entry 934 plus at least one page number. */ 935 int pending; 936 937 /* The initial (for sorting purposes) of the last primary entry written. 938 When this changes, a \initial {c} line is written */ 939 940 char *lastinitial; 941 942 int lastinitiallength; 943 944 /* When we need a string of length 1 for the value of lastinitial, 945 store it here. */ 946 947 char lastinitial1[2]; 948 949 /* Initialize static storage for writing an index. */ 950 951 void 952 init_index (void) 953 { 954 pending = 0; 955 lastinitial = lastinitial1; 956 lastinitial1[0] = 0; 957 lastinitial1[1] = 0; 958 lastinitiallength = 0; 959 lastprimarylength = 100; 960 lastprimary = (char *) xmalloc (lastprimarylength + 1); 961 memset (lastprimary, '\0', lastprimarylength + 1); 962 lastsecondarylength = 100; 963 lastsecondary = (char *) xmalloc (lastsecondarylength + 1); 964 memset (lastsecondary, '\0', lastsecondarylength + 1); 965 } 966 967 /* Indexify. Merge entries for the same name, 968 insert headers for each initial character, etc. */ 969 970 void 971 indexify (char *line, FILE *ostream) 972 { 973 char *primary, *secondary, *pagenumber; 974 int primarylength, secondarylength = 0, pagelength; 975 int nosecondary; 976 int initiallength; 977 char *initial; 978 char initial1[2]; 979 register char *p; 980 981 /* First, analyze the parts of the entry fed to us this time. */ 982 983 p = find_braced_pos (line, 0, 0, 0); 984 if (*p == '{') 985 { 986 initial = p; 987 /* Get length of inner pair of braces starting at `p', 988 including that inner pair of braces. */ 989 initiallength = find_braced_end (p + 1) + 1 - p; 990 } 991 else 992 { 993 initial = initial1; 994 initial1[0] = toupper (*p); 995 initial1[1] = 0; 996 initiallength = 1; 997 } 998 999 pagenumber = find_braced_pos (line, 1, 0, 0); 1000 pagelength = find_braced_end (pagenumber) - pagenumber; 1001 if (pagelength == 0) 1002 fatal (_("No page number in %s"), line); 1003 1004 primary = find_braced_pos (line, 2, 0, 0); 1005 primarylength = find_braced_end (primary) - primary; 1006 1007 secondary = find_braced_pos (line, 3, 0, 0); 1008 nosecondary = !*secondary; 1009 if (!nosecondary) 1010 secondarylength = find_braced_end (secondary) - secondary; 1011 1012 /* If the primary is different from before, make a new primary entry. */ 1013 if (strncmp (primary, lastprimary, primarylength)) 1014 { 1015 /* Close off current secondary entry first, if one is open. */ 1016 if (pending) 1017 { 1018 fputs ("}\n", ostream); 1019 pending = 0; 1020 } 1021 1022 /* If this primary has a different initial, include an entry for 1023 the initial. */ 1024 if (need_initials && 1025 (initiallength != lastinitiallength || 1026 strncmp (initial, lastinitial, initiallength))) 1027 { 1028 fprintf (ostream, "\\initial {"); 1029 fwrite (initial, 1, initiallength, ostream); 1030 fputs ("}\n", ostream); 1031 if (initial == initial1) 1032 { 1033 lastinitial = lastinitial1; 1034 *lastinitial1 = *initial1; 1035 } 1036 else 1037 { 1038 lastinitial = initial; 1039 } 1040 lastinitiallength = initiallength; 1041 } 1042 1043 /* Make the entry for the primary. */ 1044 if (nosecondary) 1045 fputs ("\\entry {", ostream); 1046 else 1047 fputs ("\\primary {", ostream); 1048 fwrite (primary, primarylength, 1, ostream); 1049 if (nosecondary) 1050 { 1051 fputs ("}{", ostream); 1052 pending = 1; 1053 } 1054 else 1055 fputs ("}\n", ostream); 1056 1057 /* Record name of most recent primary. */ 1058 if (lastprimarylength < primarylength) 1059 { 1060 lastprimarylength = primarylength + 100; 1061 lastprimary = (char *) xrealloc (lastprimary, 1062 1 + lastprimarylength); 1063 } 1064 strncpy (lastprimary, primary, primarylength); 1065 lastprimary[primarylength] = 0; 1066 1067 /* There is no current secondary within this primary, now. */ 1068 lastsecondary[0] = 0; 1069 } 1070 1071 /* Should not have an entry with no subtopic following one with a 1072 subtopic. */ 1073 1074 if (nosecondary && *lastsecondary) 1075 error (_("entry %s follows an entry with a secondary name"), line); 1076 1077 /* Start a new secondary entry if necessary. */ 1078 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength)) 1079 { 1080 if (pending) 1081 { 1082 fputs ("}\n", ostream); 1083 pending = 0; 1084 } 1085 1086 /* Write the entry for the secondary. */ 1087 fputs ("\\secondary {", ostream); 1088 fwrite (secondary, secondarylength, 1, ostream); 1089 fputs ("}{", ostream); 1090 pending = 1; 1091 1092 /* Record name of most recent secondary. */ 1093 if (lastsecondarylength < secondarylength) 1094 { 1095 lastsecondarylength = secondarylength + 100; 1096 lastsecondary = (char *) xrealloc (lastsecondary, 1097 1 + lastsecondarylength); 1098 } 1099 strncpy (lastsecondary, secondary, secondarylength); 1100 lastsecondary[secondarylength] = 0; 1101 } 1102 1103 /* Here to add one more page number to the current entry. */ 1104 if (pending++ != 1) 1105 fputs (", ", ostream); /* Punctuate first, if this is not the first. */ 1106 fwrite (pagenumber, pagelength, 1, ostream); 1107 } 1108 1109 /* Close out any unfinished output entry. */ 1110 1111 void 1112 finish_index (FILE *ostream) 1113 { 1114 if (pending) 1115 fputs ("}\n", ostream); 1116 free (lastprimary); 1117 free (lastsecondary); 1118 } 1119 1120 /* Copy the lines in the sorted order. 1121 Each line is copied out of the input file it was found in. */ 1122 1123 void 1124 writelines (char **linearray, int nlines, FILE *ostream) 1125 { 1126 char **stop_line = linearray + nlines; 1127 char **next_line; 1128 1129 init_index (); 1130 1131 /* Output the text of the lines, and free the buffer space. */ 1132 1133 for (next_line = linearray; next_line != stop_line; next_line++) 1134 { 1135 /* Output the line only if distinct from previous one. */ 1136 if (next_line == linearray 1137 /* Compare previous line with this one, using only the 1138 explicitly specd keyfields. */ 1139 || compare_general (*(next_line - 1), *next_line, 0L, 0L, 1140 num_keyfields - 1)) 1141 { 1142 char *p = *next_line; 1143 char c; 1144 1145 while ((c = *p++) && c != '\n') 1146 /* Do nothing. */ ; 1147 *(p - 1) = 0; 1148 indexify (*next_line, ostream); 1149 } 1150 } 1151 1152 finish_index (ostream); 1153 } 1154 1155 /* Print error message and exit. */ 1156 1157 void 1158 fatal (const char *format, const char *arg) 1159 { 1160 error (format, arg); 1161 xexit (1); 1162 } 1163 1164 /* Print error message. FORMAT is printf control string, ARG is arg for it. */ 1165 void 1166 error (const char *format, const char *arg) 1167 { 1168 printf ("%s: ", program_name); 1169 printf (format, arg); 1170 if (format[strlen (format) -1] != '\n') 1171 printf ("\n"); 1172 } 1173 1174 void 1175 perror_with_name (const char *name) 1176 { 1177 fprintf (stderr, "%s: ", program_name); 1178 perror (name); 1179 } 1180 1181 void 1182 pfatal_with_name (const char *name) 1183 { 1184 perror_with_name (name); 1185 xexit (1); 1186 } 1187 1188 1189 /* Return a newly-allocated string concatenating S1, S2, and S3. */ 1190 1191 static char * 1192 concat3 (const char *s1, const char *s2, const char *s3) 1193 { 1194 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3); 1195 char *result = (char *) xmalloc (len1 + len2 + len3 + 1); 1196 1197 strcpy (result, s1); 1198 strcpy (result + len1, s2); 1199 strcpy (result + len1 + len2, s3); 1200 *(result + len1 + len2 + len3) = 0; 1201 1202 return result; 1203 } 1204