xref: /netbsd-src/external/gpl2/texinfo/dist/util/texindex.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: texindex.c,v 1.2 2016/01/14 00:34:53 christos Exp $	*/
2 
3 /* texindex -- sort TeX index dribble output into an actual index.
4    Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp
5 
6    Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
7    2002, 2003, 2004 Free Software Foundation, Inc.
8 
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 2, or (at your option)
12    any later version.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
22 
23 #include "system.h"
24 #include <getopt.h>
25 
26 static char *program_name = "texindex";
27 
28 #if defined (emacs)
29 #  include "../src/config.h"
30 /* Some s/os.h files redefine these. */
31 #  undef read
32 #  undef close
33 #  undef write
34 #  undef open
35 #endif
36 
37 #if !defined (HAVE_MEMSET)
38 #undef memset
39 #define memset(ptr, ignore, count) bzero (ptr, count)
40 #endif
41 
42 #if !defined (SEEK_SET)
43 #  define SEEK_SET 0
44 #  define SEEK_CUR 1
45 #  define SEEK_END 2
46 #endif /* !SEEK_SET */
47 
48 /* When sorting in core, this structure describes one line
49    and the position and length of its first keyfield.  */
50 struct lineinfo
51 {
52   char *text;           /* The actual text of the line. */
53   union {
54     char *text;         /* The start of the key (for textual comparison). */
55     long number;        /* The numeric value (for numeric comparison). */
56   } key;
57   long keylen;          /* Length of KEY field. */
58 };
59 
60 /* This structure describes a field to use as a sort key. */
61 struct keyfield
62 {
63   int startwords;       /* Number of words to skip. */
64   int startchars;       /* Number of additional chars to skip. */
65   int endwords;         /* Number of words to ignore at end. */
66   int endchars;         /* Ditto for characters of last word. */
67   char ignore_blanks;   /* Non-zero means ignore spaces and tabs. */
68   char fold_case;       /* Non-zero means case doesn't matter. */
69   char reverse;         /* Non-zero means compare in reverse order. */
70   char numeric;         /* Non-zeros means field is ASCII numeric. */
71   char positional;      /* Sort according to file position. */
72   char braced;          /* Count balanced-braced groupings as fields. */
73 };
74 
75 /* Vector of keyfields to use. */
76 struct keyfield keyfields[3];
77 
78 /* Number of keyfields stored in that vector.  */
79 int num_keyfields = 3;
80 
81 /* Vector of input file names, terminated with a null pointer. */
82 char **infiles;
83 
84 /* Vector of corresponding output file names, or NULL, meaning default it
85    (add an `s' to the end). */
86 char **outfiles;
87 
88 /* Length of `infiles'. */
89 int num_infiles;
90 
91 /* Pointer to the array of pointers to lines being sorted. */
92 char **linearray;
93 
94 /* The allocated length of `linearray'. */
95 long nlines;
96 
97 /* During in-core sort, this points to the base of the data block
98    which contains all the lines of data.  */
99 char *text_base;
100 
101 /* Initially 0; changed to 1 if we want initials in this index.  */
102 int need_initials;
103 
104 /* Remembers the first initial letter seen in this index, so we can
105    determine whether we need initials in the sorted form.  */
106 char first_initial;
107 
108 /* Forward declarations of functions in this file. */
109 void decode_command (int argc, char **argv);
110 void sort_in_core (char *infile, int total, char *outfile);
111 char **parsefile (char *filename, char **nextline, char *data, long int size);
112 char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
113 char *find_pos (char *str, int words, int chars, int ignore_blanks);
114 long find_value (char *start, long int length);
115 char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
116 char *find_braced_end (char *str);
117 void writelines (char **linearray, int nlines, FILE *ostream);
118 int compare_field (struct keyfield *keyfield, char *start1,
119                    long int length1, long int pos1, char *start2,
120                    long int length2, long int pos2);
121 int compare_full (const void *, const void *);
122 void pfatal_with_name (const char *name);
123 void fatal (const char *format, const char *arg);
124 void error (const char *format, const char *arg);
125 void *xmalloc (), *xrealloc ();
126 static char *concat3 (const char *, const char *, const char *);
127 
128 int
129 main (int argc, char **argv)
130 {
131   int i;
132 
133 #ifdef HAVE_SETLOCALE
134   /* Set locale via LC_ALL.  */
135   setlocale (LC_ALL, "");
136 #endif
137 
138   /* Set the text message domain.  */
139   bindtextdomain (PACKAGE, LOCALEDIR);
140   textdomain (PACKAGE);
141 
142   /* In case we write to a redirected stdout that fails.  */
143   /* not ready atexit (close_stdout); */
144 
145   /* Describe the kind of sorting to do. */
146   /* The first keyfield uses the first braced field and folds case. */
147   keyfields[0].braced = 1;
148   keyfields[0].fold_case = 1;
149   keyfields[0].endwords = -1;
150   keyfields[0].endchars = -1;
151 
152   /* The second keyfield uses the second braced field, numerically. */
153   keyfields[1].braced = 1;
154   keyfields[1].numeric = 1;
155   keyfields[1].startwords = 1;
156   keyfields[1].endwords = -1;
157   keyfields[1].endchars = -1;
158 
159   /* The third keyfield (which is ignored while discarding duplicates)
160      compares the whole line. */
161   keyfields[2].endwords = -1;
162   keyfields[2].endchars = -1;
163 
164   decode_command (argc, argv);
165 
166   /* Process input files completely, one by one.  */
167 
168   for (i = 0; i < num_infiles; i++)
169     {
170       int desc;
171       off_t ptr;
172       char *outfile;
173       struct stat instat;
174 
175       desc = open (infiles[i], O_RDONLY, 0);
176       if (desc < 0)
177         pfatal_with_name (infiles[i]);
178 
179       if (stat (infiles[i], &instat))
180         pfatal_with_name (infiles[i]);
181       if (S_ISDIR (instat.st_mode))
182         {
183 #ifdef EISDIR
184           errno = EISDIR;
185 #endif
186           pfatal_with_name (infiles[i]);
187         }
188 
189       lseek (desc, (off_t) 0, SEEK_END);
190       ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
191 
192       close (desc);
193 
194       outfile = outfiles[i];
195       if (!outfile)
196         outfile = concat3 (infiles[i], "s", "");
197 
198       need_initials = 0;
199       first_initial = '\0';
200 
201       if (ptr != (int)ptr)
202 	{
203 	  fprintf (stderr, "%s: %s: file too large\n", program_name,
204 		   infiles[i]);
205 	  xexit (1);
206 	}
207       sort_in_core (infiles[i], (int)ptr, outfile);
208     }
209 
210   xexit (0);
211   return 0; /* Avoid bogus warnings.  */
212 }
213 
214 typedef struct
215 {
216   char *long_name;
217   char *short_name;
218   int *variable_ref;
219   int variable_value;
220   char *arg_name;
221   char *doc_string;
222 } TEXINDEX_OPTION;
223 
224 TEXINDEX_OPTION texindex_options[] = {
225   { "--help", "-h", (int *)NULL, 0, (char *)NULL,
226       N_("display this help and exit") },
227   { "--output", "-o", (int *)NULL, 0, "FILE",
228       N_("send output to FILE") },
229   { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
230       N_("display version information and exit") },
231   { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
232 };
233 
234 void
235 usage (int result_value)
236 {
237   register int i;
238   FILE *f = result_value ? stderr : stdout;
239 
240   fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
241   fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
242   /* Avoid trigraph nonsense.  */
243   fprintf (f,
244 _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
245            '?', '?'); /* avoid trigraph in cat-id-tbl.c */
246   fprintf (f, _("\nOptions:\n"));
247 
248   for (i = 0; texindex_options[i].long_name; i++)
249     {
250       putc (' ', f);
251 
252       if (texindex_options[i].short_name)
253         fprintf (f, "%s, ", texindex_options[i].short_name);
254 
255       fprintf (f, "%s %s",
256                texindex_options[i].long_name,
257                texindex_options[i].arg_name
258                ? texindex_options[i].arg_name : "");
259 
260       fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
261     }
262   fputs (_("\n\
263 Email bug reports to bug-texinfo@gnu.org,\n\
264 general questions and discussion to help-texinfo@gnu.org.\n\
265 Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
266   fputs ("\n", f);
267 
268   xexit (result_value);
269 }
270 
271 /* Decode the command line arguments to set the parameter variables
272    and set up the vector of keyfields and the vector of input files. */
273 
274 void
275 decode_command (int argc, char **argv)
276 {
277   int arg_index = 1;
278   char **ip;
279   char **op;
280 
281   /* Allocate ARGC input files, which must be enough.  */
282 
283   infiles = (char **) xmalloc (argc * sizeof (char *));
284   outfiles = (char **) xmalloc (argc * sizeof (char *));
285   ip = infiles;
286   op = outfiles;
287 
288   while (arg_index < argc)
289     {
290       char *arg = argv[arg_index++];
291 
292       if (*arg == '-')
293         {
294           if (strcmp (arg, "--version") == 0)
295             {
296               printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
297               puts ("");
298               puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
299               printf (_("There is NO warranty.  You may redistribute this software\n\
300 under the terms of the GNU General Public License.\n\
301 For more information about these matters, see the files named COPYING.\n"));
302               xexit (0);
303             }
304           else if ((strcmp (arg, "--keep") == 0) ||
305                    (strcmp (arg, "-k") == 0))
306             {
307 	      /* Ignore, for backward compatibility */
308             }
309           else if ((strcmp (arg, "--help") == 0) ||
310                    (strcmp (arg, "-h") == 0))
311             {
312               usage (0);
313             }
314           else if ((strcmp (arg, "--output") == 0) ||
315                    (strcmp (arg, "-o") == 0))
316             {
317               if (argv[arg_index] != (char *)NULL)
318                 {
319                   arg_index++;
320                   if (op > outfiles)
321                     *(op - 1) = argv[arg_index];
322                 }
323               else
324                 usage (1);
325             }
326           else
327             usage (1);
328         }
329       else
330         {
331           *ip++ = arg;
332           *op++ = (char *)NULL;
333         }
334     }
335 
336   /* Record number of keyfields and terminate list of filenames. */
337   num_infiles = ip - infiles;
338   *ip = (char *)NULL;
339   if (num_infiles == 0)
340     usage (1);
341 }
342 
343 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
344 
345 int
346 compare_full (const void *p1, const void *p2)
347 {
348   char **line1 = (char **) p1;
349   char **line2 = (char **) p2;
350   int i;
351 
352   /* Compare using the first keyfield;
353      if that does not distinguish the lines, try the second keyfield;
354      and so on. */
355 
356   for (i = 0; i < num_keyfields; i++)
357     {
358       long length1, length2;
359       char *start1 = find_field (&keyfields[i], *line1, &length1);
360       char *start2 = find_field (&keyfields[i], *line2, &length2);
361       int tem = compare_field (&keyfields[i], start1, length1,
362                                *line1 - text_base,
363                                start2, length2, *line2 - text_base);
364       if (tem)
365         {
366           if (keyfields[i].reverse)
367             return -tem;
368           return tem;
369         }
370     }
371 
372   return 0;                     /* Lines match exactly. */
373 }
374 
375 /* Compare LINE1 and LINE2, described by structures
376    in which the first keyfield is identified in advance.
377    For positional sorting, assumes that the order of the lines in core
378    reflects their nominal order.  */
379 int
380 compare_prepared (const void *p1, const void *p2)
381 {
382   struct lineinfo *line1 = (struct lineinfo *) p1;
383   struct lineinfo *line2 = (struct lineinfo *) p2;
384   int i;
385   int tem;
386   char *text1, *text2;
387 
388   /* Compare using the first keyfield, which has been found for us already. */
389   if (keyfields->positional)
390     {
391       if (line1->text - text_base > line2->text - text_base)
392         tem = 1;
393       else
394         tem = -1;
395     }
396   else if (keyfields->numeric)
397     tem = line1->key.number - line2->key.number;
398   else
399     tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
400                          line2->key.text, line2->keylen, 0);
401   if (tem)
402     {
403       if (keyfields->reverse)
404         return -tem;
405       return tem;
406     }
407 
408   text1 = line1->text;
409   text2 = line2->text;
410 
411   /* Compare using the second keyfield;
412      if that does not distinguish the lines, try the third keyfield;
413      and so on. */
414 
415   for (i = 1; i < num_keyfields; i++)
416     {
417       long length1, length2;
418       char *start1 = find_field (&keyfields[i], text1, &length1);
419       char *start2 = find_field (&keyfields[i], text2, &length2);
420       int tem = compare_field (&keyfields[i], start1, length1,
421                                text1 - text_base,
422                                start2, length2, text2 - text_base);
423       if (tem)
424         {
425           if (keyfields[i].reverse)
426             return -tem;
427           return tem;
428         }
429     }
430 
431   return 0;                     /* Lines match exactly. */
432 }
433 
434 /* Like compare_full but more general.
435    You can pass any strings, and you can say how many keyfields to use.
436    POS1 and POS2 should indicate the nominal positional ordering of
437    the two lines in the input.  */
438 
439 int
440 compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
441 {
442   int i;
443 
444   /* Compare using the first keyfield;
445      if that does not distinguish the lines, try the second keyfield;
446      and so on. */
447 
448   for (i = 0; i < use_keyfields; i++)
449     {
450       long length1, length2;
451       char *start1 = find_field (&keyfields[i], str1, &length1);
452       char *start2 = find_field (&keyfields[i], str2, &length2);
453       int tem = compare_field (&keyfields[i], start1, length1, pos1,
454                                start2, length2, pos2);
455       if (tem)
456         {
457           if (keyfields[i].reverse)
458             return -tem;
459           return tem;
460         }
461     }
462 
463   return 0;                     /* Lines match exactly. */
464 }
465 
466 /* Find the start and length of a field in STR according to KEYFIELD.
467    A pointer to the starting character is returned, and the length
468    is stored into the int that LENGTHPTR points to.  */
469 
470 char *
471 find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
472 {
473   char *start;
474   char *end;
475   char *(*fun) ();
476 
477   if (keyfield->braced)
478     fun = find_braced_pos;
479   else
480     fun = find_pos;
481 
482   start = (*fun) (str, keyfield->startwords, keyfield->startchars,
483                   keyfield->ignore_blanks);
484   if (keyfield->endwords < 0)
485     {
486       if (keyfield->braced)
487         end = find_braced_end (start);
488       else
489         {
490           end = start;
491           while (*end && *end != '\n')
492             end++;
493         }
494     }
495   else
496     {
497       end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
498       if (end - str < start - str)
499         end = start;
500     }
501   *lengthptr = end - start;
502   return start;
503 }
504 
505 /* Return a pointer to a specified place within STR,
506    skipping (from the beginning) WORDS words and then CHARS chars.
507    If IGNORE_BLANKS is nonzero, we skip all blanks
508    after finding the specified word.  */
509 
510 char *
511 find_pos (char *str, int words, int chars, int ignore_blanks)
512 {
513   int i;
514   char *p = str;
515 
516   for (i = 0; i < words; i++)
517     {
518       char c;
519       /* Find next bunch of nonblanks and skip them. */
520       while ((c = *p) == ' ' || c == '\t')
521         p++;
522       while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
523         p++;
524       if (!*p || *p == '\n')
525         return p;
526     }
527 
528   while (*p == ' ' || *p == '\t')
529     p++;
530 
531   for (i = 0; i < chars; i++)
532     {
533       if (!*p || *p == '\n')
534         break;
535       p++;
536     }
537   return p;
538 }
539 
540 /* Like find_pos but assumes that each field is surrounded by braces
541    and that braces within fields are balanced. */
542 
543 char *
544 find_braced_pos (char *str, int words, int chars, int ignore_blanks)
545 {
546   int i;
547   int bracelevel;
548   char *p = str;
549   char c;
550 
551   for (i = 0; i < words; i++)
552     {
553       bracelevel = 1;
554       while ((c = *p++) != '{' && c != '\n' && c)
555         /* Do nothing. */ ;
556       if (c != '{')
557         return p - 1;
558       while (bracelevel)
559         {
560           c = *p++;
561           if (c == '{')
562             bracelevel++;
563           if (c == '}')
564             bracelevel--;
565           if (c == 0 || c == '\n')
566             return p - 1;
567         }
568     }
569 
570   while ((c = *p++) != '{' && c != '\n' && c)
571     /* Do nothing. */ ;
572 
573   if (c != '{')
574     return p - 1;
575 
576   if (ignore_blanks)
577     while ((c = *p) == ' ' || c == '\t')
578       p++;
579 
580   for (i = 0; i < chars; i++)
581     {
582       if (!*p || *p == '\n')
583         break;
584       p++;
585     }
586   return p;
587 }
588 
589 /* Find the end of the balanced-brace field which starts at STR.
590    The position returned is just before the closing brace. */
591 
592 char *
593 find_braced_end (char *str)
594 {
595   int bracelevel;
596   char *p = str;
597   char c;
598 
599   bracelevel = 1;
600   while (bracelevel)
601     {
602       c = *p++;
603       if (c == '{')
604         bracelevel++;
605       if (c == '}')
606         bracelevel--;
607       if (c == 0 || c == '\n')
608         return p - 1;
609     }
610   return p - 1;
611 }
612 
613 long
614 find_value (char *start, long int length)
615 {
616   while (length != 0L)
617     {
618       if (isdigit (*start))
619         return atol (start);
620       length--;
621       start++;
622     }
623   return 0l;
624 }
625 
626 /* Vector used to translate characters for comparison.
627    This is how we make all alphanumerics follow all else,
628    and ignore case in the first sorting.  */
629 int char_order[256];
630 
631 void
632 init_char_order (void)
633 {
634   int i;
635   for (i = 1; i < 256; i++)
636     char_order[i] = i;
637 
638   for (i = '0'; i <= '9'; i++)
639     char_order[i] += 512;
640 
641   for (i = 'a'; i <= 'z'; i++)
642     {
643       char_order[i] = 512 + i;
644       char_order[i + 'A' - 'a'] = 512 + i;
645     }
646 }
647 
648 /* Compare two fields (each specified as a start pointer and a character count)
649    according to KEYFIELD.
650    The sign of the value reports the relation between the fields. */
651 
652 int
653 compare_field (struct keyfield *keyfield, char *start1, long int length1,
654                long int pos1, char *start2, long int length2, long int pos2)
655 {
656   if (keyfields->positional)
657     {
658       if (pos1 > pos2)
659         return 1;
660       else
661         return -1;
662     }
663   if (keyfield->numeric)
664     {
665       long value = find_value (start1, length1) - find_value (start2, length2);
666       if (value > 0)
667         return 1;
668       if (value < 0)
669         return -1;
670       return 0;
671     }
672   else
673     {
674       char *p1 = start1;
675       char *p2 = start2;
676       char *e1 = start1 + length1;
677       char *e2 = start2 + length2;
678 
679       while (1)
680         {
681           int c1, c2;
682 
683           if (p1 == e1)
684             c1 = 0;
685           else
686             c1 = *p1++;
687           if (p2 == e2)
688             c2 = 0;
689           else
690             c2 = *p2++;
691 
692           if (char_order[c1] != char_order[c2])
693             return char_order[c1] - char_order[c2];
694           if (!c1)
695             break;
696         }
697 
698       /* Strings are equal except possibly for case.  */
699       p1 = start1;
700       p2 = start2;
701       while (1)
702         {
703           int c1, c2;
704 
705           if (p1 == e1)
706             c1 = 0;
707           else
708             c1 = *p1++;
709           if (p2 == e2)
710             c2 = 0;
711           else
712             c2 = *p2++;
713 
714           if (c1 != c2)
715             /* Reverse sign here so upper case comes out last.  */
716             return c2 - c1;
717           if (!c1)
718             break;
719         }
720 
721       return 0;
722     }
723 }
724 
725 /* Sort INFILE, whose size is TOTAL,
726    assuming that is small enough to be done in-core,
727    then indexify it and send the output to OUTFILE (or to stdout).  */
728 
729 void
730 sort_in_core (char *infile, int total, char *outfile)
731 {
732   char **nextline;
733   char *data = (char *) xmalloc (total + 1);
734   char *file_data;
735   long file_size;
736   int i;
737   FILE *ostream = stdout;
738   struct lineinfo *lineinfo;
739 
740   /* Read the contents of the file into the moby array `data'. */
741 
742   int desc = open (infile, O_RDONLY, 0);
743 
744   if (desc < 0)
745     fatal (_("failure reopening %s"), infile);
746   for (file_size = 0;;)
747     {
748       i = read (desc, data + file_size, total - file_size);
749       if (i <= 0)
750         break;
751       file_size += i;
752     }
753   file_data = data;
754   data[file_size] = 0;
755 
756   close (desc);
757 
758   if (file_size > 0 && data[0] != '\\' && data[0] != '@')
759     {
760       error (_("%s: not a texinfo index file"), infile);
761       return;
762     }
763 
764   init_char_order ();
765 
766   /* Sort routines want to know this address. */
767 
768   text_base = data;
769 
770   /* Create the array of pointers to lines, with a default size
771      frequently enough.  */
772 
773   nlines = total / 50;
774   if (!nlines)
775     nlines = 2;
776   linearray = (char **) xmalloc (nlines * sizeof (char *));
777 
778   /* `nextline' points to the next free slot in this array.
779      `nlines' is the allocated size.  */
780 
781   nextline = linearray;
782 
783   /* Parse the input file's data, and make entries for the lines.  */
784 
785   nextline = parsefile (infile, nextline, file_data, file_size);
786   if (nextline == 0)
787     {
788       error (_("%s: not a texinfo index file"), infile);
789       return;
790     }
791 
792   /* Sort the lines. */
793 
794   /* If we have enough space, find the first keyfield of each line in advance.
795      Make a `struct lineinfo' for each line, which records the keyfield
796      as well as the line, and sort them.  */
797 
798   lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
799 
800   if (lineinfo)
801     {
802       struct lineinfo *lp;
803       char **p;
804 
805       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
806         {
807           lp->text = *p;
808           lp->key.text = find_field (keyfields, *p, &lp->keylen);
809           if (keyfields->numeric)
810             lp->key.number = find_value (lp->key.text, lp->keylen);
811         }
812 
813       qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
814              compare_prepared);
815 
816       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
817         *p = lp->text;
818 
819       free (lineinfo);
820     }
821   else
822     qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
823 
824   /* Open the output file. */
825 
826   if (outfile)
827     {
828       ostream = fopen (outfile, "w");
829       if (!ostream)
830         pfatal_with_name (outfile);
831     }
832 
833   writelines (linearray, nextline - linearray, ostream);
834   if (outfile)
835     fclose (ostream);
836 
837   free (linearray);
838   free (data);
839 }
840 
841 /* Parse an input string in core into lines.
842    DATA is the input string, and SIZE is its length.
843    Data goes in LINEARRAY starting at NEXTLINE.
844    The value returned is the first entry in LINEARRAY still unused.
845    Value 0 means input file contents are invalid.  */
846 
847 char **
848 parsefile (char *filename, char **nextline, char *data, long int size)
849 {
850   char *p, *end;
851   char **line = nextline;
852 
853   p = data;
854   end = p + size;
855   *end = 0;
856 
857   while (p != end)
858     {
859       if (p[0] != '\\' && p[0] != '@')
860         return 0;
861 
862       *line = p;
863 
864       /* Find the first letter of the first field of this line.  If it
865          is different from the first letter of the first field of the
866          first line, we need initial headers in the output index.  */
867       while (*p && *p != '{')
868         p++;
869       if (p == end)
870         return 0;
871       p++;
872       if (first_initial)
873         {
874           if (first_initial != toupper (*p))
875             need_initials = 1;
876         }
877       else
878         first_initial = toupper (*p);
879 
880       while (*p && *p != '\n')
881         p++;
882       if (p != end)
883         p++;
884 
885       line++;
886       if (line == linearray + nlines)
887         {
888           char **old = linearray;
889           linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
890           line += linearray - old;
891         }
892     }
893 
894   return line;
895 }
896 
897 /* Indexification is a filter applied to the sorted lines
898    as they are being written to the output file.
899    Multiple entries for the same name, with different page numbers,
900    get combined into a single entry with multiple page numbers.
901    The first braced field, which is used for sorting, is discarded.
902    However, its first character is examined, folded to lower case,
903    and if it is different from that in the previous line fed to us
904    a \initial line is written with one argument, the new initial.
905 
906    If an entry has four braced fields, then the second and third
907    constitute primary and secondary names.
908    In this case, each change of primary name
909    generates a \primary line which contains only the primary name,
910    and in between these are \secondary lines which contain
911    just a secondary name and page numbers. */
912 
913 /* The last primary name we wrote a \primary entry for.
914    If only one level of indexing is being done, this is the last name seen. */
915 char *lastprimary;
916 /* Length of storage allocated for lastprimary. */
917 int lastprimarylength;
918 
919 /* Similar, for the secondary name. */
920 char *lastsecondary;
921 int lastsecondarylength;
922 
923 /* Zero if we are not in the middle of writing an entry.
924    One if we have written the beginning of an entry but have not
925    yet written any page numbers into it.
926    Greater than one if we have written the beginning of an entry
927    plus at least one page number. */
928 int pending;
929 
930 /* The initial (for sorting purposes) of the last primary entry written.
931    When this changes, a \initial {c} line is written */
932 
933 char *lastinitial;
934 
935 int lastinitiallength;
936 
937 /* When we need a string of length 1 for the value of lastinitial,
938    store it here.  */
939 
940 char lastinitial1[2];
941 
942 /* Initialize static storage for writing an index. */
943 
944 void
945 init_index (void)
946 {
947   pending = 0;
948   lastinitial = lastinitial1;
949   lastinitial1[0] = 0;
950   lastinitial1[1] = 0;
951   lastinitiallength = 0;
952   lastprimarylength = 100;
953   lastprimary = (char *) xmalloc (lastprimarylength + 1);
954   memset (lastprimary, '\0', lastprimarylength + 1);
955   lastsecondarylength = 100;
956   lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
957   memset (lastsecondary, '\0', lastsecondarylength + 1);
958 }
959 
960 /* Indexify.  Merge entries for the same name,
961    insert headers for each initial character, etc.  */
962 
963 void
964 indexify (char *line, FILE *ostream)
965 {
966   char *primary, *secondary, *pagenumber;
967   int primarylength, secondarylength = 0, pagelength;
968   int nosecondary;
969   int initiallength;
970   char *initial;
971   char initial1[2];
972   register char *p;
973 
974   /* First, analyze the parts of the entry fed to us this time. */
975 
976   p = find_braced_pos (line, 0, 0, 0);
977   if (*p == '{')
978     {
979       initial = p;
980       /* Get length of inner pair of braces starting at `p',
981          including that inner pair of braces.  */
982       initiallength = find_braced_end (p + 1) + 1 - p;
983     }
984   else
985     {
986       initial = initial1;
987       initial1[0] = toupper (*p);
988       initial1[1] = 0;
989       initiallength = 1;
990     }
991 
992   pagenumber = find_braced_pos (line, 1, 0, 0);
993   pagelength = find_braced_end (pagenumber) - pagenumber;
994   if (pagelength == 0)
995     fatal (_("No page number in %s"), line);
996 
997   primary = find_braced_pos (line, 2, 0, 0);
998   primarylength = find_braced_end (primary) - primary;
999 
1000   secondary = find_braced_pos (line, 3, 0, 0);
1001   nosecondary = !*secondary;
1002   if (!nosecondary)
1003     secondarylength = find_braced_end (secondary) - secondary;
1004 
1005   /* If the primary is different from before, make a new primary entry. */
1006   if (strncmp (primary, lastprimary, primarylength))
1007     {
1008       /* Close off current secondary entry first, if one is open. */
1009       if (pending)
1010         {
1011           fputs ("}\n", ostream);
1012           pending = 0;
1013         }
1014 
1015       /* If this primary has a different initial, include an entry for
1016          the initial. */
1017       if (need_initials &&
1018           (initiallength != lastinitiallength ||
1019            strncmp (initial, lastinitial, initiallength)))
1020         {
1021           fprintf (ostream, "\\initial {");
1022           fwrite (initial, 1, initiallength, ostream);
1023           fputs ("}\n", ostream);
1024           if (initial == initial1)
1025             {
1026               lastinitial = lastinitial1;
1027               *lastinitial1 = *initial1;
1028             }
1029           else
1030             {
1031               lastinitial = initial;
1032             }
1033           lastinitiallength = initiallength;
1034         }
1035 
1036       /* Make the entry for the primary.  */
1037       if (nosecondary)
1038         fputs ("\\entry {", ostream);
1039       else
1040         fputs ("\\primary {", ostream);
1041       fwrite (primary, primarylength, 1, ostream);
1042       if (nosecondary)
1043         {
1044           fputs ("}{", ostream);
1045           pending = 1;
1046         }
1047       else
1048         fputs ("}\n", ostream);
1049 
1050       /* Record name of most recent primary. */
1051       if (lastprimarylength < primarylength)
1052         {
1053           lastprimarylength = primarylength + 100;
1054           lastprimary = (char *) xrealloc (lastprimary,
1055                                            1 + lastprimarylength);
1056         }
1057       strncpy (lastprimary, primary, primarylength);
1058       lastprimary[primarylength] = 0;
1059 
1060       /* There is no current secondary within this primary, now. */
1061       lastsecondary[0] = 0;
1062     }
1063 
1064   /* Should not have an entry with no subtopic following one with a
1065      subtopic. */
1066 
1067   if (nosecondary && *lastsecondary)
1068     error (_("entry %s follows an entry with a secondary name"), line);
1069 
1070   /* Start a new secondary entry if necessary. */
1071   if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1072     {
1073       if (pending)
1074         {
1075           fputs ("}\n", ostream);
1076           pending = 0;
1077         }
1078 
1079       /* Write the entry for the secondary.  */
1080       fputs ("\\secondary {", ostream);
1081       fwrite (secondary, secondarylength, 1, ostream);
1082       fputs ("}{", ostream);
1083       pending = 1;
1084 
1085       /* Record name of most recent secondary. */
1086       if (lastsecondarylength < secondarylength)
1087         {
1088           lastsecondarylength = secondarylength + 100;
1089           lastsecondary = (char *) xrealloc (lastsecondary,
1090                                              1 + lastsecondarylength);
1091         }
1092       strncpy (lastsecondary, secondary, secondarylength);
1093       lastsecondary[secondarylength] = 0;
1094     }
1095 
1096   /* Here to add one more page number to the current entry. */
1097   if (pending++ != 1)
1098     fputs (", ", ostream);  /* Punctuate first, if this is not the first. */
1099   fwrite (pagenumber, pagelength, 1, ostream);
1100 }
1101 
1102 /* Close out any unfinished output entry. */
1103 
1104 void
1105 finish_index (FILE *ostream)
1106 {
1107   if (pending)
1108     fputs ("}\n", ostream);
1109   free (lastprimary);
1110   free (lastsecondary);
1111 }
1112 
1113 /* Copy the lines in the sorted order.
1114    Each line is copied out of the input file it was found in. */
1115 
1116 void
1117 writelines (char **linearray, int nlines, FILE *ostream)
1118 {
1119   char **stop_line = linearray + nlines;
1120   char **next_line;
1121 
1122   init_index ();
1123 
1124   /* Output the text of the lines, and free the buffer space. */
1125 
1126   for (next_line = linearray; next_line != stop_line; next_line++)
1127     {
1128       /* Output the line only if distinct from previous one.  */
1129       if (next_line == linearray
1130       /* Compare previous line with this one, using only the
1131          explicitly specd keyfields. */
1132           || compare_general (*(next_line - 1), *next_line, 0L, 0L,
1133                               num_keyfields - 1))
1134         {
1135           char *p = *next_line;
1136           char c;
1137 
1138           while ((c = *p++) && c != '\n')
1139             /* Do nothing. */ ;
1140           *(p - 1) = 0;
1141           indexify (*next_line, ostream);
1142         }
1143     }
1144 
1145   finish_index (ostream);
1146 }
1147 
1148 /* Print error message and exit.  */
1149 
1150 void
1151 fatal (const char *format, const char *arg)
1152 {
1153   error (format, arg);
1154   xexit (1);
1155 }
1156 
1157 /* Print error message.  FORMAT is printf control string, ARG is arg for it. */
1158 void
1159 error (const char *format, const char *arg)
1160 {
1161   printf ("%s: ", program_name);
1162   printf (format, arg);
1163   if (format[strlen (format) -1] != '\n')
1164     printf ("\n");
1165 }
1166 
1167 void
1168 perror_with_name (const char *name)
1169 {
1170   fprintf (stderr, "%s: ", program_name);
1171   perror (name);
1172 }
1173 
1174 void
1175 pfatal_with_name (const char *name)
1176 {
1177   perror_with_name (name);
1178   xexit (1);
1179 }
1180 
1181 
1182 /* Return a newly-allocated string concatenating S1, S2, and S3.  */
1183 
1184 static char *
1185 concat3 (const char *s1, const char *s2, const char *s3)
1186 {
1187   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1188   char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1189 
1190   strcpy (result, s1);
1191   strcpy (result + len1, s2);
1192   strcpy (result + len1 + len2, s3);
1193   *(result + len1 + len2 + len3) = 0;
1194 
1195   return result;
1196 }
1197