1 /* $NetBSD: texindex.c,v 1.3 2024/04/07 12:30:38 christos Exp $ */
2
3 /* texindex -- sort TeX index dribble output into an actual index.
4 Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp
5
6 Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
7 2002, 2003, 2004 Free Software Foundation, Inc.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
22
23 #include "system.h"
24 #include <getopt.h>
25
26 static char *program_name = "texindex";
27
28 #if defined (emacs)
29 # include "../src/config.h"
30 /* Some s/os.h files redefine these. */
31 # undef read
32 # undef close
33 # undef write
34 # undef open
35 #endif
36
37 #if !defined (HAVE_MEMSET)
38 #undef memset
39 #define memset(ptr, ignore, count) bzero (ptr, count)
40 #endif
41
42 #if !defined (SEEK_SET)
43 # define SEEK_SET 0
44 # define SEEK_CUR 1
45 # define SEEK_END 2
46 #endif /* !SEEK_SET */
47
48 /* When sorting in core, this structure describes one line
49 and the position and length of its first keyfield. */
50 struct lineinfo
51 {
52 char *text; /* The actual text of the line. */
53 union {
54 char *text; /* The start of the key (for textual comparison). */
55 long number; /* The numeric value (for numeric comparison). */
56 } key;
57 long keylen; /* Length of KEY field. */
58 size_t idx; /* tie breaker */
59 };
60
61 /* This structure describes a field to use as a sort key. */
62 struct keyfield
63 {
64 int startwords; /* Number of words to skip. */
65 int startchars; /* Number of additional chars to skip. */
66 int endwords; /* Number of words to ignore at end. */
67 int endchars; /* Ditto for characters of last word. */
68 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */
69 char fold_case; /* Non-zero means case doesn't matter. */
70 char reverse; /* Non-zero means compare in reverse order. */
71 char numeric; /* Non-zeros means field is ASCII numeric. */
72 char positional; /* Sort according to file position. */
73 char braced; /* Count balanced-braced groupings as fields. */
74 };
75
76 /* Vector of keyfields to use. */
77 struct keyfield keyfields[3];
78
79 /* Number of keyfields stored in that vector. */
80 int num_keyfields = 3;
81
82 /* Vector of input file names, terminated with a null pointer. */
83 char **infiles;
84
85 /* Vector of corresponding output file names, or NULL, meaning default it
86 (add an `s' to the end). */
87 char **outfiles;
88
89 /* Length of `infiles'. */
90 int num_infiles;
91
92 /* Pointer to the array of pointers to lines being sorted. */
93 char **linearray;
94
95 /* The allocated length of `linearray'. */
96 long nlines;
97
98 /* During in-core sort, this points to the base of the data block
99 which contains all the lines of data. */
100 char *text_base;
101
102 /* Initially 0; changed to 1 if we want initials in this index. */
103 int need_initials;
104
105 /* Remembers the first initial letter seen in this index, so we can
106 determine whether we need initials in the sorted form. */
107 char first_initial;
108
109 /* Forward declarations of functions in this file. */
110 void decode_command (int argc, char **argv);
111 void sort_in_core (char *infile, int total, char *outfile);
112 char **parsefile (char *filename, char **nextline, char *data, long int size);
113 char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
114 char *find_pos (char *str, int words, int chars, int ignore_blanks);
115 long find_value (char *start, long int length);
116 char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
117 char *find_braced_end (char *str);
118 void writelines (char **linearray, int nlines, FILE *ostream);
119 int compare_field (struct keyfield *keyfield, char *start1,
120 long int length1, long int pos1, char *start2,
121 long int length2, long int pos2);
122 int compare_full (const void *, const void *);
123 void pfatal_with_name (const char *name);
124 void fatal (const char *format, const char *arg);
125 void error (const char *format, const char *arg);
126 void *xmalloc (), *xrealloc ();
127 static char *concat3 (const char *, const char *, const char *);
128
129 int
main(int argc,char ** argv)130 main (int argc, char **argv)
131 {
132 int i;
133
134 #ifdef HAVE_SETLOCALE
135 /* Set locale via LC_ALL. */
136 setlocale (LC_ALL, "");
137 #endif
138
139 /* Set the text message domain. */
140 bindtextdomain (PACKAGE, LOCALEDIR);
141 textdomain (PACKAGE);
142
143 /* In case we write to a redirected stdout that fails. */
144 /* not ready atexit (close_stdout); */
145
146 /* Describe the kind of sorting to do. */
147 /* The first keyfield uses the first braced field and folds case. */
148 keyfields[0].braced = 1;
149 keyfields[0].fold_case = 1;
150 keyfields[0].endwords = -1;
151 keyfields[0].endchars = -1;
152
153 /* The second keyfield uses the second braced field, numerically. */
154 keyfields[1].braced = 1;
155 keyfields[1].numeric = 1;
156 keyfields[1].startwords = 1;
157 keyfields[1].endwords = -1;
158 keyfields[1].endchars = -1;
159
160 /* The third keyfield (which is ignored while discarding duplicates)
161 compares the whole line. */
162 keyfields[2].endwords = -1;
163 keyfields[2].endchars = -1;
164
165 decode_command (argc, argv);
166
167 /* Process input files completely, one by one. */
168
169 for (i = 0; i < num_infiles; i++)
170 {
171 int desc;
172 off_t ptr;
173 char *outfile;
174 struct stat instat;
175
176 desc = open (infiles[i], O_RDONLY, 0);
177 if (desc < 0)
178 pfatal_with_name (infiles[i]);
179
180 if (stat (infiles[i], &instat))
181 pfatal_with_name (infiles[i]);
182 if (S_ISDIR (instat.st_mode))
183 {
184 #ifdef EISDIR
185 errno = EISDIR;
186 #endif
187 pfatal_with_name (infiles[i]);
188 }
189
190 lseek (desc, (off_t) 0, SEEK_END);
191 ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
192
193 close (desc);
194
195 outfile = outfiles[i];
196 if (!outfile)
197 outfile = concat3 (infiles[i], "s", "");
198
199 need_initials = 0;
200 first_initial = '\0';
201
202 if (ptr != (int)ptr)
203 {
204 fprintf (stderr, "%s: %s: file too large\n", program_name,
205 infiles[i]);
206 xexit (1);
207 }
208 sort_in_core (infiles[i], (int)ptr, outfile);
209 }
210
211 xexit (0);
212 return 0; /* Avoid bogus warnings. */
213 }
214
215 typedef struct
216 {
217 char *long_name;
218 char *short_name;
219 int *variable_ref;
220 int variable_value;
221 char *arg_name;
222 char *doc_string;
223 } TEXINDEX_OPTION;
224
225 TEXINDEX_OPTION texindex_options[] = {
226 { "--help", "-h", (int *)NULL, 0, (char *)NULL,
227 N_("display this help and exit") },
228 { "--output", "-o", (int *)NULL, 0, "FILE",
229 N_("send output to FILE") },
230 { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
231 N_("display version information and exit") },
232 { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
233 };
234
235 void
usage(int result_value)236 usage (int result_value)
237 {
238 register int i;
239 FILE *f = result_value ? stderr : stdout;
240
241 fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
242 fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
243 /* Avoid trigraph nonsense. */
244 fprintf (f,
245 _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
246 '?', '?'); /* avoid trigraph in cat-id-tbl.c */
247 fprintf (f, _("\nOptions:\n"));
248
249 for (i = 0; texindex_options[i].long_name; i++)
250 {
251 putc (' ', f);
252
253 if (texindex_options[i].short_name)
254 fprintf (f, "%s, ", texindex_options[i].short_name);
255
256 fprintf (f, "%s %s",
257 texindex_options[i].long_name,
258 texindex_options[i].arg_name
259 ? texindex_options[i].arg_name : "");
260
261 fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
262 }
263 fputs (_("\n\
264 Email bug reports to bug-texinfo@gnu.org,\n\
265 general questions and discussion to help-texinfo@gnu.org.\n\
266 Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
267 fputs ("\n", f);
268
269 xexit (result_value);
270 }
271
272 /* Decode the command line arguments to set the parameter variables
273 and set up the vector of keyfields and the vector of input files. */
274
275 void
decode_command(int argc,char ** argv)276 decode_command (int argc, char **argv)
277 {
278 int arg_index = 1;
279 char **ip;
280 char **op;
281
282 /* Allocate ARGC input files, which must be enough. */
283
284 infiles = (char **) xmalloc (argc * sizeof (char *));
285 outfiles = (char **) xmalloc (argc * sizeof (char *));
286 ip = infiles;
287 op = outfiles;
288
289 while (arg_index < argc)
290 {
291 char *arg = argv[arg_index++];
292
293 if (*arg == '-')
294 {
295 if (strcmp (arg, "--version") == 0)
296 {
297 printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
298 puts ("");
299 puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
300 printf (_("There is NO warranty. You may redistribute this software\n\
301 under the terms of the GNU General Public License.\n\
302 For more information about these matters, see the files named COPYING.\n"));
303 xexit (0);
304 }
305 else if ((strcmp (arg, "--keep") == 0) ||
306 (strcmp (arg, "-k") == 0))
307 {
308 /* Ignore, for backward compatibility */
309 }
310 else if ((strcmp (arg, "--help") == 0) ||
311 (strcmp (arg, "-h") == 0))
312 {
313 usage (0);
314 }
315 else if ((strcmp (arg, "--output") == 0) ||
316 (strcmp (arg, "-o") == 0))
317 {
318 if (argv[arg_index] != (char *)NULL)
319 {
320 arg_index++;
321 if (op > outfiles)
322 *(op - 1) = argv[arg_index];
323 }
324 else
325 usage (1);
326 }
327 else
328 usage (1);
329 }
330 else
331 {
332 *ip++ = arg;
333 *op++ = (char *)NULL;
334 }
335 }
336
337 /* Record number of keyfields and terminate list of filenames. */
338 num_infiles = ip - infiles;
339 *ip = (char *)NULL;
340 if (num_infiles == 0)
341 usage (1);
342 }
343
344 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
345
346 int
compare_full(const void * p1,const void * p2)347 compare_full (const void *p1, const void *p2)
348 {
349 char **line1 = (char **) p1;
350 char **line2 = (char **) p2;
351 int i;
352
353 /* Compare using the first keyfield;
354 if that does not distinguish the lines, try the second keyfield;
355 and so on. */
356
357 for (i = 0; i < num_keyfields; i++)
358 {
359 long length1, length2;
360 char *start1 = find_field (&keyfields[i], *line1, &length1);
361 char *start2 = find_field (&keyfields[i], *line2, &length2);
362 int tem = compare_field (&keyfields[i], start1, length1,
363 *line1 - text_base,
364 start2, length2, *line2 - text_base);
365 if (tem)
366 {
367 if (keyfields[i].reverse)
368 return -tem;
369 return tem;
370 }
371 }
372
373 if (*line1 == *line2)
374 abort ();
375 return *line1 < *line2 ? -1 : 1;
376 }
377
378 /* Compare LINE1 and LINE2, described by structures
379 in which the first keyfield is identified in advance.
380 For positional sorting, assumes that the order of the lines in core
381 reflects their nominal order. */
382 int
compare_prepared(const void * p1,const void * p2)383 compare_prepared (const void *p1, const void *p2)
384 {
385 struct lineinfo *line1 = (struct lineinfo *) p1;
386 struct lineinfo *line2 = (struct lineinfo *) p2;
387 int i;
388 int tem;
389 char *text1, *text2;
390
391 /* Compare using the first keyfield, which has been found for us already. */
392 if (keyfields->positional)
393 {
394 if (line1->text - text_base > line2->text - text_base)
395 tem = 1;
396 else
397 tem = -1;
398 }
399 else if (keyfields->numeric)
400 tem = line1->key.number - line2->key.number;
401 else
402 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
403 line2->key.text, line2->keylen, 0);
404 if (tem)
405 {
406 if (keyfields->reverse)
407 return -tem;
408 return tem;
409 }
410
411 text1 = line1->text;
412 text2 = line2->text;
413
414 /* Compare using the second keyfield;
415 if that does not distinguish the lines, try the third keyfield;
416 and so on. */
417
418 for (i = 1; i < num_keyfields; i++)
419 {
420 long length1, length2;
421 char *start1 = find_field (&keyfields[i], text1, &length1);
422 char *start2 = find_field (&keyfields[i], text2, &length2);
423 int tem = compare_field (&keyfields[i], start1, length1,
424 text1 - text_base,
425 start2, length2, text2 - text_base);
426 if (tem)
427 {
428 if (keyfields[i].reverse)
429 return -tem;
430 return tem;
431 }
432 }
433
434 if (line1->idx == line2->idx)
435 abort ();
436 return line1->idx < line2->idx ? -1 : 1;
437 }
438
439 /* Like compare_full but more general.
440 You can pass any strings, and you can say how many keyfields to use.
441 POS1 and POS2 should indicate the nominal positional ordering of
442 the two lines in the input. */
443
444 int
compare_general(char * str1,char * str2,long int pos1,long int pos2,int use_keyfields)445 compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
446 {
447 int i;
448
449 /* Compare using the first keyfield;
450 if that does not distinguish the lines, try the second keyfield;
451 and so on. */
452
453 for (i = 0; i < use_keyfields; i++)
454 {
455 long length1, length2;
456 char *start1 = find_field (&keyfields[i], str1, &length1);
457 char *start2 = find_field (&keyfields[i], str2, &length2);
458 int tem = compare_field (&keyfields[i], start1, length1, pos1,
459 start2, length2, pos2);
460 if (tem)
461 {
462 if (keyfields[i].reverse)
463 return -tem;
464 return tem;
465 }
466 }
467
468 return 0; /* Lines match exactly. */
469 }
470
471 /* Find the start and length of a field in STR according to KEYFIELD.
472 A pointer to the starting character is returned, and the length
473 is stored into the int that LENGTHPTR points to. */
474
475 char *
find_field(struct keyfield * keyfield,char * str,long int * lengthptr)476 find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
477 {
478 char *start;
479 char *end;
480 char *(*fun) ();
481
482 if (keyfield->braced)
483 fun = find_braced_pos;
484 else
485 fun = find_pos;
486
487 start = (*fun) (str, keyfield->startwords, keyfield->startchars,
488 keyfield->ignore_blanks);
489 if (keyfield->endwords < 0)
490 {
491 if (keyfield->braced)
492 end = find_braced_end (start);
493 else
494 {
495 end = start;
496 while (*end && *end != '\n')
497 end++;
498 }
499 }
500 else
501 {
502 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
503 if (end - str < start - str)
504 end = start;
505 }
506 *lengthptr = end - start;
507 return start;
508 }
509
510 /* Return a pointer to a specified place within STR,
511 skipping (from the beginning) WORDS words and then CHARS chars.
512 If IGNORE_BLANKS is nonzero, we skip all blanks
513 after finding the specified word. */
514
515 char *
find_pos(char * str,int words,int chars,int ignore_blanks)516 find_pos (char *str, int words, int chars, int ignore_blanks)
517 {
518 int i;
519 char *p = str;
520
521 for (i = 0; i < words; i++)
522 {
523 char c;
524 /* Find next bunch of nonblanks and skip them. */
525 while ((c = *p) == ' ' || c == '\t')
526 p++;
527 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
528 p++;
529 if (!*p || *p == '\n')
530 return p;
531 }
532
533 while (*p == ' ' || *p == '\t')
534 p++;
535
536 for (i = 0; i < chars; i++)
537 {
538 if (!*p || *p == '\n')
539 break;
540 p++;
541 }
542 return p;
543 }
544
545 /* Like find_pos but assumes that each field is surrounded by braces
546 and that braces within fields are balanced. */
547
548 char *
find_braced_pos(char * str,int words,int chars,int ignore_blanks)549 find_braced_pos (char *str, int words, int chars, int ignore_blanks)
550 {
551 int i;
552 int bracelevel;
553 char *p = str;
554 char c;
555
556 for (i = 0; i < words; i++)
557 {
558 bracelevel = 1;
559 while ((c = *p++) != '{' && c != '\n' && c)
560 /* Do nothing. */ ;
561 if (c != '{')
562 return p - 1;
563 while (bracelevel)
564 {
565 c = *p++;
566 if (c == '{')
567 bracelevel++;
568 if (c == '}')
569 bracelevel--;
570 if (c == 0 || c == '\n')
571 return p - 1;
572 }
573 }
574
575 while ((c = *p++) != '{' && c != '\n' && c)
576 /* Do nothing. */ ;
577
578 if (c != '{')
579 return p - 1;
580
581 if (ignore_blanks)
582 while ((c = *p) == ' ' || c == '\t')
583 p++;
584
585 for (i = 0; i < chars; i++)
586 {
587 if (!*p || *p == '\n')
588 break;
589 p++;
590 }
591 return p;
592 }
593
594 /* Find the end of the balanced-brace field which starts at STR.
595 The position returned is just before the closing brace. */
596
597 char *
find_braced_end(char * str)598 find_braced_end (char *str)
599 {
600 int bracelevel;
601 char *p = str;
602 char c;
603
604 bracelevel = 1;
605 while (bracelevel)
606 {
607 c = *p++;
608 if (c == '{')
609 bracelevel++;
610 if (c == '}')
611 bracelevel--;
612 if (c == 0 || c == '\n')
613 return p - 1;
614 }
615 return p - 1;
616 }
617
618 long
find_value(char * start,long int length)619 find_value (char *start, long int length)
620 {
621 while (length != 0L)
622 {
623 if (isdigit (*start))
624 return atol (start);
625 length--;
626 start++;
627 }
628 return 0l;
629 }
630
631 /* Vector used to translate characters for comparison.
632 This is how we make all alphanumerics follow all else,
633 and ignore case in the first sorting. */
634 int char_order[256];
635
636 void
init_char_order(void)637 init_char_order (void)
638 {
639 int i;
640 for (i = 1; i < 256; i++)
641 char_order[i] = i;
642
643 for (i = '0'; i <= '9'; i++)
644 char_order[i] += 512;
645
646 for (i = 'a'; i <= 'z'; i++)
647 {
648 char_order[i] = 512 + i;
649 char_order[i + 'A' - 'a'] = 512 + i;
650 }
651 }
652
653 /* Compare two fields (each specified as a start pointer and a character count)
654 according to KEYFIELD.
655 The sign of the value reports the relation between the fields. */
656
657 int
compare_field(struct keyfield * keyfield,char * start1,long int length1,long int pos1,char * start2,long int length2,long int pos2)658 compare_field (struct keyfield *keyfield, char *start1, long int length1,
659 long int pos1, char *start2, long int length2, long int pos2)
660 {
661 if (keyfields->positional)
662 {
663 if (pos1 > pos2)
664 return 1;
665 else
666 return -1;
667 }
668 if (keyfield->numeric)
669 {
670 long value = find_value (start1, length1) - find_value (start2, length2);
671 if (value > 0)
672 return 1;
673 if (value < 0)
674 return -1;
675 return 0;
676 }
677 else
678 {
679 char *p1 = start1;
680 char *p2 = start2;
681 char *e1 = start1 + length1;
682 char *e2 = start2 + length2;
683
684 while (1)
685 {
686 int c1, c2;
687
688 if (p1 == e1)
689 c1 = 0;
690 else
691 c1 = *p1++;
692 if (p2 == e2)
693 c2 = 0;
694 else
695 c2 = *p2++;
696
697 if (char_order[c1] != char_order[c2])
698 return char_order[c1] - char_order[c2];
699 if (!c1)
700 break;
701 }
702
703 /* Strings are equal except possibly for case. */
704 p1 = start1;
705 p2 = start2;
706 while (1)
707 {
708 int c1, c2;
709
710 if (p1 == e1)
711 c1 = 0;
712 else
713 c1 = *p1++;
714 if (p2 == e2)
715 c2 = 0;
716 else
717 c2 = *p2++;
718
719 if (c1 != c2)
720 /* Reverse sign here so upper case comes out last. */
721 return c2 - c1;
722 if (!c1)
723 break;
724 }
725
726 return 0;
727 }
728 }
729
730 /* Sort INFILE, whose size is TOTAL,
731 assuming that is small enough to be done in-core,
732 then indexify it and send the output to OUTFILE (or to stdout). */
733
734 void
sort_in_core(char * infile,int total,char * outfile)735 sort_in_core (char *infile, int total, char *outfile)
736 {
737 char **nextline;
738 char *data = (char *) xmalloc (total + 1);
739 char *file_data;
740 long file_size;
741 int i;
742 FILE *ostream = stdout;
743 struct lineinfo *lineinfo;
744
745 /* Read the contents of the file into the moby array `data'. */
746
747 int desc = open (infile, O_RDONLY, 0);
748
749 if (desc < 0)
750 fatal (_("failure reopening %s"), infile);
751 for (file_size = 0;;)
752 {
753 i = read (desc, data + file_size, total - file_size);
754 if (i <= 0)
755 break;
756 file_size += i;
757 }
758 file_data = data;
759 data[file_size] = 0;
760
761 close (desc);
762
763 if (file_size > 0 && data[0] != '\\' && data[0] != '@')
764 {
765 error (_("%s: not a texinfo index file"), infile);
766 return;
767 }
768
769 init_char_order ();
770
771 /* Sort routines want to know this address. */
772
773 text_base = data;
774
775 /* Create the array of pointers to lines, with a default size
776 frequently enough. */
777
778 nlines = total / 50;
779 if (!nlines)
780 nlines = 2;
781 linearray = (char **) xmalloc (nlines * sizeof (char *));
782
783 /* `nextline' points to the next free slot in this array.
784 `nlines' is the allocated size. */
785
786 nextline = linearray;
787
788 /* Parse the input file's data, and make entries for the lines. */
789
790 nextline = parsefile (infile, nextline, file_data, file_size);
791 if (nextline == 0)
792 {
793 error (_("%s: not a texinfo index file"), infile);
794 return;
795 }
796
797 /* Sort the lines. */
798
799 /* If we have enough space, find the first keyfield of each line in advance.
800 Make a `struct lineinfo' for each line, which records the keyfield
801 as well as the line, and sort them. */
802
803 lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
804
805 if (lineinfo)
806 {
807 size_t idx = 0;
808 struct lineinfo *lp;
809 char **p;
810
811 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
812 {
813 lp->idx = idx++;
814 lp->text = *p;
815 lp->key.text = find_field (keyfields, *p, &lp->keylen);
816 if (keyfields->numeric)
817 lp->key.number = find_value (lp->key.text, lp->keylen);
818 }
819
820 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
821 compare_prepared);
822
823 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
824 *p = lp->text;
825
826 free (lineinfo);
827 }
828 else
829 qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
830
831 /* Open the output file. */
832
833 if (outfile)
834 {
835 ostream = fopen (outfile, "w");
836 if (!ostream)
837 pfatal_with_name (outfile);
838 }
839
840 writelines (linearray, nextline - linearray, ostream);
841 if (outfile)
842 fclose (ostream);
843
844 free (linearray);
845 free (data);
846 }
847
848 /* Parse an input string in core into lines.
849 DATA is the input string, and SIZE is its length.
850 Data goes in LINEARRAY starting at NEXTLINE.
851 The value returned is the first entry in LINEARRAY still unused.
852 Value 0 means input file contents are invalid. */
853
854 char **
parsefile(char * filename,char ** nextline,char * data,long int size)855 parsefile (char *filename, char **nextline, char *data, long int size)
856 {
857 char *p, *end;
858 char **line = nextline;
859
860 p = data;
861 end = p + size;
862 *end = 0;
863
864 while (p != end)
865 {
866 if (p[0] != '\\' && p[0] != '@')
867 return 0;
868
869 *line = p;
870
871 /* Find the first letter of the first field of this line. If it
872 is different from the first letter of the first field of the
873 first line, we need initial headers in the output index. */
874 while (*p && *p != '{')
875 p++;
876 if (p == end)
877 return 0;
878 p++;
879 if (first_initial)
880 {
881 if (first_initial != toupper (*p))
882 need_initials = 1;
883 }
884 else
885 first_initial = toupper (*p);
886
887 while (*p && *p != '\n')
888 p++;
889 if (p != end)
890 p++;
891
892 line++;
893 if (line == linearray + nlines)
894 {
895 char **old = linearray;
896 linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
897 line += linearray - old;
898 }
899 }
900
901 return line;
902 }
903
904 /* Indexification is a filter applied to the sorted lines
905 as they are being written to the output file.
906 Multiple entries for the same name, with different page numbers,
907 get combined into a single entry with multiple page numbers.
908 The first braced field, which is used for sorting, is discarded.
909 However, its first character is examined, folded to lower case,
910 and if it is different from that in the previous line fed to us
911 a \initial line is written with one argument, the new initial.
912
913 If an entry has four braced fields, then the second and third
914 constitute primary and secondary names.
915 In this case, each change of primary name
916 generates a \primary line which contains only the primary name,
917 and in between these are \secondary lines which contain
918 just a secondary name and page numbers. */
919
920 /* The last primary name we wrote a \primary entry for.
921 If only one level of indexing is being done, this is the last name seen. */
922 char *lastprimary;
923 /* Length of storage allocated for lastprimary. */
924 int lastprimarylength;
925
926 /* Similar, for the secondary name. */
927 char *lastsecondary;
928 int lastsecondarylength;
929
930 /* Zero if we are not in the middle of writing an entry.
931 One if we have written the beginning of an entry but have not
932 yet written any page numbers into it.
933 Greater than one if we have written the beginning of an entry
934 plus at least one page number. */
935 int pending;
936
937 /* The initial (for sorting purposes) of the last primary entry written.
938 When this changes, a \initial {c} line is written */
939
940 char *lastinitial;
941
942 int lastinitiallength;
943
944 /* When we need a string of length 1 for the value of lastinitial,
945 store it here. */
946
947 char lastinitial1[2];
948
949 /* Initialize static storage for writing an index. */
950
951 void
init_index(void)952 init_index (void)
953 {
954 pending = 0;
955 lastinitial = lastinitial1;
956 lastinitial1[0] = 0;
957 lastinitial1[1] = 0;
958 lastinitiallength = 0;
959 lastprimarylength = 100;
960 lastprimary = (char *) xmalloc (lastprimarylength + 1);
961 memset (lastprimary, '\0', lastprimarylength + 1);
962 lastsecondarylength = 100;
963 lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
964 memset (lastsecondary, '\0', lastsecondarylength + 1);
965 }
966
967 /* Indexify. Merge entries for the same name,
968 insert headers for each initial character, etc. */
969
970 void
indexify(char * line,FILE * ostream)971 indexify (char *line, FILE *ostream)
972 {
973 char *primary, *secondary, *pagenumber;
974 int primarylength, secondarylength = 0, pagelength;
975 int nosecondary;
976 int initiallength;
977 char *initial;
978 char initial1[2];
979 register char *p;
980
981 /* First, analyze the parts of the entry fed to us this time. */
982
983 p = find_braced_pos (line, 0, 0, 0);
984 if (*p == '{')
985 {
986 initial = p;
987 /* Get length of inner pair of braces starting at `p',
988 including that inner pair of braces. */
989 initiallength = find_braced_end (p + 1) + 1 - p;
990 }
991 else
992 {
993 initial = initial1;
994 initial1[0] = toupper (*p);
995 initial1[1] = 0;
996 initiallength = 1;
997 }
998
999 pagenumber = find_braced_pos (line, 1, 0, 0);
1000 pagelength = find_braced_end (pagenumber) - pagenumber;
1001 if (pagelength == 0)
1002 fatal (_("No page number in %s"), line);
1003
1004 primary = find_braced_pos (line, 2, 0, 0);
1005 primarylength = find_braced_end (primary) - primary;
1006
1007 secondary = find_braced_pos (line, 3, 0, 0);
1008 nosecondary = !*secondary;
1009 if (!nosecondary)
1010 secondarylength = find_braced_end (secondary) - secondary;
1011
1012 /* If the primary is different from before, make a new primary entry. */
1013 if (strncmp (primary, lastprimary, primarylength))
1014 {
1015 /* Close off current secondary entry first, if one is open. */
1016 if (pending)
1017 {
1018 fputs ("}\n", ostream);
1019 pending = 0;
1020 }
1021
1022 /* If this primary has a different initial, include an entry for
1023 the initial. */
1024 if (need_initials &&
1025 (initiallength != lastinitiallength ||
1026 strncmp (initial, lastinitial, initiallength)))
1027 {
1028 fprintf (ostream, "\\initial {");
1029 fwrite (initial, 1, initiallength, ostream);
1030 fputs ("}\n", ostream);
1031 if (initial == initial1)
1032 {
1033 lastinitial = lastinitial1;
1034 *lastinitial1 = *initial1;
1035 }
1036 else
1037 {
1038 lastinitial = initial;
1039 }
1040 lastinitiallength = initiallength;
1041 }
1042
1043 /* Make the entry for the primary. */
1044 if (nosecondary)
1045 fputs ("\\entry {", ostream);
1046 else
1047 fputs ("\\primary {", ostream);
1048 fwrite (primary, primarylength, 1, ostream);
1049 if (nosecondary)
1050 {
1051 fputs ("}{", ostream);
1052 pending = 1;
1053 }
1054 else
1055 fputs ("}\n", ostream);
1056
1057 /* Record name of most recent primary. */
1058 if (lastprimarylength < primarylength)
1059 {
1060 lastprimarylength = primarylength + 100;
1061 lastprimary = (char *) xrealloc (lastprimary,
1062 1 + lastprimarylength);
1063 }
1064 strncpy (lastprimary, primary, primarylength);
1065 lastprimary[primarylength] = 0;
1066
1067 /* There is no current secondary within this primary, now. */
1068 lastsecondary[0] = 0;
1069 }
1070
1071 /* Should not have an entry with no subtopic following one with a
1072 subtopic. */
1073
1074 if (nosecondary && *lastsecondary)
1075 error (_("entry %s follows an entry with a secondary name"), line);
1076
1077 /* Start a new secondary entry if necessary. */
1078 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1079 {
1080 if (pending)
1081 {
1082 fputs ("}\n", ostream);
1083 pending = 0;
1084 }
1085
1086 /* Write the entry for the secondary. */
1087 fputs ("\\secondary {", ostream);
1088 fwrite (secondary, secondarylength, 1, ostream);
1089 fputs ("}{", ostream);
1090 pending = 1;
1091
1092 /* Record name of most recent secondary. */
1093 if (lastsecondarylength < secondarylength)
1094 {
1095 lastsecondarylength = secondarylength + 100;
1096 lastsecondary = (char *) xrealloc (lastsecondary,
1097 1 + lastsecondarylength);
1098 }
1099 strncpy (lastsecondary, secondary, secondarylength);
1100 lastsecondary[secondarylength] = 0;
1101 }
1102
1103 /* Here to add one more page number to the current entry. */
1104 if (pending++ != 1)
1105 fputs (", ", ostream); /* Punctuate first, if this is not the first. */
1106 fwrite (pagenumber, pagelength, 1, ostream);
1107 }
1108
1109 /* Close out any unfinished output entry. */
1110
1111 void
finish_index(FILE * ostream)1112 finish_index (FILE *ostream)
1113 {
1114 if (pending)
1115 fputs ("}\n", ostream);
1116 free (lastprimary);
1117 free (lastsecondary);
1118 }
1119
1120 /* Copy the lines in the sorted order.
1121 Each line is copied out of the input file it was found in. */
1122
1123 void
writelines(char ** linearray,int nlines,FILE * ostream)1124 writelines (char **linearray, int nlines, FILE *ostream)
1125 {
1126 char **stop_line = linearray + nlines;
1127 char **next_line;
1128
1129 init_index ();
1130
1131 /* Output the text of the lines, and free the buffer space. */
1132
1133 for (next_line = linearray; next_line != stop_line; next_line++)
1134 {
1135 /* Output the line only if distinct from previous one. */
1136 if (next_line == linearray
1137 /* Compare previous line with this one, using only the
1138 explicitly specd keyfields. */
1139 || compare_general (*(next_line - 1), *next_line, 0L, 0L,
1140 num_keyfields - 1))
1141 {
1142 char *p = *next_line;
1143 char c;
1144
1145 while ((c = *p++) && c != '\n')
1146 /* Do nothing. */ ;
1147 *(p - 1) = 0;
1148 indexify (*next_line, ostream);
1149 }
1150 }
1151
1152 finish_index (ostream);
1153 }
1154
1155 /* Print error message and exit. */
1156
1157 void
fatal(const char * format,const char * arg)1158 fatal (const char *format, const char *arg)
1159 {
1160 error (format, arg);
1161 xexit (1);
1162 }
1163
1164 /* Print error message. FORMAT is printf control string, ARG is arg for it. */
1165 void
error(const char * format,const char * arg)1166 error (const char *format, const char *arg)
1167 {
1168 printf ("%s: ", program_name);
1169 printf (format, arg);
1170 if (format[strlen (format) -1] != '\n')
1171 printf ("\n");
1172 }
1173
1174 void
perror_with_name(const char * name)1175 perror_with_name (const char *name)
1176 {
1177 fprintf (stderr, "%s: ", program_name);
1178 perror (name);
1179 }
1180
1181 void
pfatal_with_name(const char * name)1182 pfatal_with_name (const char *name)
1183 {
1184 perror_with_name (name);
1185 xexit (1);
1186 }
1187
1188
1189 /* Return a newly-allocated string concatenating S1, S2, and S3. */
1190
1191 static char *
concat3(const char * s1,const char * s2,const char * s3)1192 concat3 (const char *s1, const char *s2, const char *s3)
1193 {
1194 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1195 char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1196
1197 strcpy (result, s1);
1198 strcpy (result + len1, s2);
1199 strcpy (result + len1 + len2, s3);
1200 *(result + len1 + len2 + len3) = 0;
1201
1202 return result;
1203 }
1204