xref: /netbsd-src/external/gpl2/texinfo/dist/util/texindex.c (revision 11b31797aa29d695c8b11c2294fbc1059b5dcddb)
1 /*	$NetBSD: texindex.c,v 1.3 2024/04/07 12:30:38 christos Exp $	*/
2 
3 /* texindex -- sort TeX index dribble output into an actual index.
4    Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp
5 
6    Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
7    2002, 2003, 2004 Free Software Foundation, Inc.
8 
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 2, or (at your option)
12    any later version.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
22 
23 #include "system.h"
24 #include <getopt.h>
25 
26 static char *program_name = "texindex";
27 
28 #if defined (emacs)
29 #  include "../src/config.h"
30 /* Some s/os.h files redefine these. */
31 #  undef read
32 #  undef close
33 #  undef write
34 #  undef open
35 #endif
36 
37 #if !defined (HAVE_MEMSET)
38 #undef memset
39 #define memset(ptr, ignore, count) bzero (ptr, count)
40 #endif
41 
42 #if !defined (SEEK_SET)
43 #  define SEEK_SET 0
44 #  define SEEK_CUR 1
45 #  define SEEK_END 2
46 #endif /* !SEEK_SET */
47 
48 /* When sorting in core, this structure describes one line
49    and the position and length of its first keyfield.  */
50 struct lineinfo
51 {
52   char *text;           /* The actual text of the line. */
53   union {
54     char *text;         /* The start of the key (for textual comparison). */
55     long number;        /* The numeric value (for numeric comparison). */
56   } key;
57   long keylen;          /* Length of KEY field. */
58   size_t idx;		/* tie breaker */
59 };
60 
61 /* This structure describes a field to use as a sort key. */
62 struct keyfield
63 {
64   int startwords;       /* Number of words to skip. */
65   int startchars;       /* Number of additional chars to skip. */
66   int endwords;         /* Number of words to ignore at end. */
67   int endchars;         /* Ditto for characters of last word. */
68   char ignore_blanks;   /* Non-zero means ignore spaces and tabs. */
69   char fold_case;       /* Non-zero means case doesn't matter. */
70   char reverse;         /* Non-zero means compare in reverse order. */
71   char numeric;         /* Non-zeros means field is ASCII numeric. */
72   char positional;      /* Sort according to file position. */
73   char braced;          /* Count balanced-braced groupings as fields. */
74 };
75 
76 /* Vector of keyfields to use. */
77 struct keyfield keyfields[3];
78 
79 /* Number of keyfields stored in that vector.  */
80 int num_keyfields = 3;
81 
82 /* Vector of input file names, terminated with a null pointer. */
83 char **infiles;
84 
85 /* Vector of corresponding output file names, or NULL, meaning default it
86    (add an `s' to the end). */
87 char **outfiles;
88 
89 /* Length of `infiles'. */
90 int num_infiles;
91 
92 /* Pointer to the array of pointers to lines being sorted. */
93 char **linearray;
94 
95 /* The allocated length of `linearray'. */
96 long nlines;
97 
98 /* During in-core sort, this points to the base of the data block
99    which contains all the lines of data.  */
100 char *text_base;
101 
102 /* Initially 0; changed to 1 if we want initials in this index.  */
103 int need_initials;
104 
105 /* Remembers the first initial letter seen in this index, so we can
106    determine whether we need initials in the sorted form.  */
107 char first_initial;
108 
109 /* Forward declarations of functions in this file. */
110 void decode_command (int argc, char **argv);
111 void sort_in_core (char *infile, int total, char *outfile);
112 char **parsefile (char *filename, char **nextline, char *data, long int size);
113 char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
114 char *find_pos (char *str, int words, int chars, int ignore_blanks);
115 long find_value (char *start, long int length);
116 char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
117 char *find_braced_end (char *str);
118 void writelines (char **linearray, int nlines, FILE *ostream);
119 int compare_field (struct keyfield *keyfield, char *start1,
120                    long int length1, long int pos1, char *start2,
121                    long int length2, long int pos2);
122 int compare_full (const void *, const void *);
123 void pfatal_with_name (const char *name);
124 void fatal (const char *format, const char *arg);
125 void error (const char *format, const char *arg);
126 void *xmalloc (), *xrealloc ();
127 static char *concat3 (const char *, const char *, const char *);
128 
129 int
main(int argc,char ** argv)130 main (int argc, char **argv)
131 {
132   int i;
133 
134 #ifdef HAVE_SETLOCALE
135   /* Set locale via LC_ALL.  */
136   setlocale (LC_ALL, "");
137 #endif
138 
139   /* Set the text message domain.  */
140   bindtextdomain (PACKAGE, LOCALEDIR);
141   textdomain (PACKAGE);
142 
143   /* In case we write to a redirected stdout that fails.  */
144   /* not ready atexit (close_stdout); */
145 
146   /* Describe the kind of sorting to do. */
147   /* The first keyfield uses the first braced field and folds case. */
148   keyfields[0].braced = 1;
149   keyfields[0].fold_case = 1;
150   keyfields[0].endwords = -1;
151   keyfields[0].endchars = -1;
152 
153   /* The second keyfield uses the second braced field, numerically. */
154   keyfields[1].braced = 1;
155   keyfields[1].numeric = 1;
156   keyfields[1].startwords = 1;
157   keyfields[1].endwords = -1;
158   keyfields[1].endchars = -1;
159 
160   /* The third keyfield (which is ignored while discarding duplicates)
161      compares the whole line. */
162   keyfields[2].endwords = -1;
163   keyfields[2].endchars = -1;
164 
165   decode_command (argc, argv);
166 
167   /* Process input files completely, one by one.  */
168 
169   for (i = 0; i < num_infiles; i++)
170     {
171       int desc;
172       off_t ptr;
173       char *outfile;
174       struct stat instat;
175 
176       desc = open (infiles[i], O_RDONLY, 0);
177       if (desc < 0)
178         pfatal_with_name (infiles[i]);
179 
180       if (stat (infiles[i], &instat))
181         pfatal_with_name (infiles[i]);
182       if (S_ISDIR (instat.st_mode))
183         {
184 #ifdef EISDIR
185           errno = EISDIR;
186 #endif
187           pfatal_with_name (infiles[i]);
188         }
189 
190       lseek (desc, (off_t) 0, SEEK_END);
191       ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
192 
193       close (desc);
194 
195       outfile = outfiles[i];
196       if (!outfile)
197         outfile = concat3 (infiles[i], "s", "");
198 
199       need_initials = 0;
200       first_initial = '\0';
201 
202       if (ptr != (int)ptr)
203 	{
204 	  fprintf (stderr, "%s: %s: file too large\n", program_name,
205 		   infiles[i]);
206 	  xexit (1);
207 	}
208       sort_in_core (infiles[i], (int)ptr, outfile);
209     }
210 
211   xexit (0);
212   return 0; /* Avoid bogus warnings.  */
213 }
214 
215 typedef struct
216 {
217   char *long_name;
218   char *short_name;
219   int *variable_ref;
220   int variable_value;
221   char *arg_name;
222   char *doc_string;
223 } TEXINDEX_OPTION;
224 
225 TEXINDEX_OPTION texindex_options[] = {
226   { "--help", "-h", (int *)NULL, 0, (char *)NULL,
227       N_("display this help and exit") },
228   { "--output", "-o", (int *)NULL, 0, "FILE",
229       N_("send output to FILE") },
230   { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
231       N_("display version information and exit") },
232   { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
233 };
234 
235 void
usage(int result_value)236 usage (int result_value)
237 {
238   register int i;
239   FILE *f = result_value ? stderr : stdout;
240 
241   fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
242   fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
243   /* Avoid trigraph nonsense.  */
244   fprintf (f,
245 _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
246            '?', '?'); /* avoid trigraph in cat-id-tbl.c */
247   fprintf (f, _("\nOptions:\n"));
248 
249   for (i = 0; texindex_options[i].long_name; i++)
250     {
251       putc (' ', f);
252 
253       if (texindex_options[i].short_name)
254         fprintf (f, "%s, ", texindex_options[i].short_name);
255 
256       fprintf (f, "%s %s",
257                texindex_options[i].long_name,
258                texindex_options[i].arg_name
259                ? texindex_options[i].arg_name : "");
260 
261       fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
262     }
263   fputs (_("\n\
264 Email bug reports to bug-texinfo@gnu.org,\n\
265 general questions and discussion to help-texinfo@gnu.org.\n\
266 Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
267   fputs ("\n", f);
268 
269   xexit (result_value);
270 }
271 
272 /* Decode the command line arguments to set the parameter variables
273    and set up the vector of keyfields and the vector of input files. */
274 
275 void
decode_command(int argc,char ** argv)276 decode_command (int argc, char **argv)
277 {
278   int arg_index = 1;
279   char **ip;
280   char **op;
281 
282   /* Allocate ARGC input files, which must be enough.  */
283 
284   infiles = (char **) xmalloc (argc * sizeof (char *));
285   outfiles = (char **) xmalloc (argc * sizeof (char *));
286   ip = infiles;
287   op = outfiles;
288 
289   while (arg_index < argc)
290     {
291       char *arg = argv[arg_index++];
292 
293       if (*arg == '-')
294         {
295           if (strcmp (arg, "--version") == 0)
296             {
297               printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
298               puts ("");
299               puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
300               printf (_("There is NO warranty.  You may redistribute this software\n\
301 under the terms of the GNU General Public License.\n\
302 For more information about these matters, see the files named COPYING.\n"));
303               xexit (0);
304             }
305           else if ((strcmp (arg, "--keep") == 0) ||
306                    (strcmp (arg, "-k") == 0))
307             {
308 	      /* Ignore, for backward compatibility */
309             }
310           else if ((strcmp (arg, "--help") == 0) ||
311                    (strcmp (arg, "-h") == 0))
312             {
313               usage (0);
314             }
315           else if ((strcmp (arg, "--output") == 0) ||
316                    (strcmp (arg, "-o") == 0))
317             {
318               if (argv[arg_index] != (char *)NULL)
319                 {
320                   arg_index++;
321                   if (op > outfiles)
322                     *(op - 1) = argv[arg_index];
323                 }
324               else
325                 usage (1);
326             }
327           else
328             usage (1);
329         }
330       else
331         {
332           *ip++ = arg;
333           *op++ = (char *)NULL;
334         }
335     }
336 
337   /* Record number of keyfields and terminate list of filenames. */
338   num_infiles = ip - infiles;
339   *ip = (char *)NULL;
340   if (num_infiles == 0)
341     usage (1);
342 }
343 
344 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
345 
346 int
compare_full(const void * p1,const void * p2)347 compare_full (const void *p1, const void *p2)
348 {
349   char **line1 = (char **) p1;
350   char **line2 = (char **) p2;
351   int i;
352 
353   /* Compare using the first keyfield;
354      if that does not distinguish the lines, try the second keyfield;
355      and so on. */
356 
357   for (i = 0; i < num_keyfields; i++)
358     {
359       long length1, length2;
360       char *start1 = find_field (&keyfields[i], *line1, &length1);
361       char *start2 = find_field (&keyfields[i], *line2, &length2);
362       int tem = compare_field (&keyfields[i], start1, length1,
363                                *line1 - text_base,
364                                start2, length2, *line2 - text_base);
365       if (tem)
366         {
367           if (keyfields[i].reverse)
368             return -tem;
369           return tem;
370         }
371     }
372 
373   if (*line1 == *line2)
374     abort ();
375   return *line1 < *line2 ? -1 : 1;
376 }
377 
378 /* Compare LINE1 and LINE2, described by structures
379    in which the first keyfield is identified in advance.
380    For positional sorting, assumes that the order of the lines in core
381    reflects their nominal order.  */
382 int
compare_prepared(const void * p1,const void * p2)383 compare_prepared (const void *p1, const void *p2)
384 {
385   struct lineinfo *line1 = (struct lineinfo *) p1;
386   struct lineinfo *line2 = (struct lineinfo *) p2;
387   int i;
388   int tem;
389   char *text1, *text2;
390 
391   /* Compare using the first keyfield, which has been found for us already. */
392   if (keyfields->positional)
393     {
394       if (line1->text - text_base > line2->text - text_base)
395         tem = 1;
396       else
397         tem = -1;
398     }
399   else if (keyfields->numeric)
400     tem = line1->key.number - line2->key.number;
401   else
402     tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
403                          line2->key.text, line2->keylen, 0);
404   if (tem)
405     {
406       if (keyfields->reverse)
407         return -tem;
408       return tem;
409     }
410 
411   text1 = line1->text;
412   text2 = line2->text;
413 
414   /* Compare using the second keyfield;
415      if that does not distinguish the lines, try the third keyfield;
416      and so on. */
417 
418   for (i = 1; i < num_keyfields; i++)
419     {
420       long length1, length2;
421       char *start1 = find_field (&keyfields[i], text1, &length1);
422       char *start2 = find_field (&keyfields[i], text2, &length2);
423       int tem = compare_field (&keyfields[i], start1, length1,
424                                text1 - text_base,
425                                start2, length2, text2 - text_base);
426       if (tem)
427         {
428           if (keyfields[i].reverse)
429             return -tem;
430           return tem;
431         }
432     }
433 
434   if (line1->idx == line2->idx)
435     abort ();
436   return line1->idx < line2->idx ? -1 : 1;
437 }
438 
439 /* Like compare_full but more general.
440    You can pass any strings, and you can say how many keyfields to use.
441    POS1 and POS2 should indicate the nominal positional ordering of
442    the two lines in the input.  */
443 
444 int
compare_general(char * str1,char * str2,long int pos1,long int pos2,int use_keyfields)445 compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
446 {
447   int i;
448 
449   /* Compare using the first keyfield;
450      if that does not distinguish the lines, try the second keyfield;
451      and so on. */
452 
453   for (i = 0; i < use_keyfields; i++)
454     {
455       long length1, length2;
456       char *start1 = find_field (&keyfields[i], str1, &length1);
457       char *start2 = find_field (&keyfields[i], str2, &length2);
458       int tem = compare_field (&keyfields[i], start1, length1, pos1,
459                                start2, length2, pos2);
460       if (tem)
461         {
462           if (keyfields[i].reverse)
463             return -tem;
464           return tem;
465         }
466     }
467 
468   return 0;                     /* Lines match exactly. */
469 }
470 
471 /* Find the start and length of a field in STR according to KEYFIELD.
472    A pointer to the starting character is returned, and the length
473    is stored into the int that LENGTHPTR points to.  */
474 
475 char *
find_field(struct keyfield * keyfield,char * str,long int * lengthptr)476 find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
477 {
478   char *start;
479   char *end;
480   char *(*fun) ();
481 
482   if (keyfield->braced)
483     fun = find_braced_pos;
484   else
485     fun = find_pos;
486 
487   start = (*fun) (str, keyfield->startwords, keyfield->startchars,
488                   keyfield->ignore_blanks);
489   if (keyfield->endwords < 0)
490     {
491       if (keyfield->braced)
492         end = find_braced_end (start);
493       else
494         {
495           end = start;
496           while (*end && *end != '\n')
497             end++;
498         }
499     }
500   else
501     {
502       end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
503       if (end - str < start - str)
504         end = start;
505     }
506   *lengthptr = end - start;
507   return start;
508 }
509 
510 /* Return a pointer to a specified place within STR,
511    skipping (from the beginning) WORDS words and then CHARS chars.
512    If IGNORE_BLANKS is nonzero, we skip all blanks
513    after finding the specified word.  */
514 
515 char *
find_pos(char * str,int words,int chars,int ignore_blanks)516 find_pos (char *str, int words, int chars, int ignore_blanks)
517 {
518   int i;
519   char *p = str;
520 
521   for (i = 0; i < words; i++)
522     {
523       char c;
524       /* Find next bunch of nonblanks and skip them. */
525       while ((c = *p) == ' ' || c == '\t')
526         p++;
527       while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
528         p++;
529       if (!*p || *p == '\n')
530         return p;
531     }
532 
533   while (*p == ' ' || *p == '\t')
534     p++;
535 
536   for (i = 0; i < chars; i++)
537     {
538       if (!*p || *p == '\n')
539         break;
540       p++;
541     }
542   return p;
543 }
544 
545 /* Like find_pos but assumes that each field is surrounded by braces
546    and that braces within fields are balanced. */
547 
548 char *
find_braced_pos(char * str,int words,int chars,int ignore_blanks)549 find_braced_pos (char *str, int words, int chars, int ignore_blanks)
550 {
551   int i;
552   int bracelevel;
553   char *p = str;
554   char c;
555 
556   for (i = 0; i < words; i++)
557     {
558       bracelevel = 1;
559       while ((c = *p++) != '{' && c != '\n' && c)
560         /* Do nothing. */ ;
561       if (c != '{')
562         return p - 1;
563       while (bracelevel)
564         {
565           c = *p++;
566           if (c == '{')
567             bracelevel++;
568           if (c == '}')
569             bracelevel--;
570           if (c == 0 || c == '\n')
571             return p - 1;
572         }
573     }
574 
575   while ((c = *p++) != '{' && c != '\n' && c)
576     /* Do nothing. */ ;
577 
578   if (c != '{')
579     return p - 1;
580 
581   if (ignore_blanks)
582     while ((c = *p) == ' ' || c == '\t')
583       p++;
584 
585   for (i = 0; i < chars; i++)
586     {
587       if (!*p || *p == '\n')
588         break;
589       p++;
590     }
591   return p;
592 }
593 
594 /* Find the end of the balanced-brace field which starts at STR.
595    The position returned is just before the closing brace. */
596 
597 char *
find_braced_end(char * str)598 find_braced_end (char *str)
599 {
600   int bracelevel;
601   char *p = str;
602   char c;
603 
604   bracelevel = 1;
605   while (bracelevel)
606     {
607       c = *p++;
608       if (c == '{')
609         bracelevel++;
610       if (c == '}')
611         bracelevel--;
612       if (c == 0 || c == '\n')
613         return p - 1;
614     }
615   return p - 1;
616 }
617 
618 long
find_value(char * start,long int length)619 find_value (char *start, long int length)
620 {
621   while (length != 0L)
622     {
623       if (isdigit (*start))
624         return atol (start);
625       length--;
626       start++;
627     }
628   return 0l;
629 }
630 
631 /* Vector used to translate characters for comparison.
632    This is how we make all alphanumerics follow all else,
633    and ignore case in the first sorting.  */
634 int char_order[256];
635 
636 void
init_char_order(void)637 init_char_order (void)
638 {
639   int i;
640   for (i = 1; i < 256; i++)
641     char_order[i] = i;
642 
643   for (i = '0'; i <= '9'; i++)
644     char_order[i] += 512;
645 
646   for (i = 'a'; i <= 'z'; i++)
647     {
648       char_order[i] = 512 + i;
649       char_order[i + 'A' - 'a'] = 512 + i;
650     }
651 }
652 
653 /* Compare two fields (each specified as a start pointer and a character count)
654    according to KEYFIELD.
655    The sign of the value reports the relation between the fields. */
656 
657 int
compare_field(struct keyfield * keyfield,char * start1,long int length1,long int pos1,char * start2,long int length2,long int pos2)658 compare_field (struct keyfield *keyfield, char *start1, long int length1,
659                long int pos1, char *start2, long int length2, long int pos2)
660 {
661   if (keyfields->positional)
662     {
663       if (pos1 > pos2)
664         return 1;
665       else
666         return -1;
667     }
668   if (keyfield->numeric)
669     {
670       long value = find_value (start1, length1) - find_value (start2, length2);
671       if (value > 0)
672         return 1;
673       if (value < 0)
674         return -1;
675       return 0;
676     }
677   else
678     {
679       char *p1 = start1;
680       char *p2 = start2;
681       char *e1 = start1 + length1;
682       char *e2 = start2 + length2;
683 
684       while (1)
685         {
686           int c1, c2;
687 
688           if (p1 == e1)
689             c1 = 0;
690           else
691             c1 = *p1++;
692           if (p2 == e2)
693             c2 = 0;
694           else
695             c2 = *p2++;
696 
697           if (char_order[c1] != char_order[c2])
698             return char_order[c1] - char_order[c2];
699           if (!c1)
700             break;
701         }
702 
703       /* Strings are equal except possibly for case.  */
704       p1 = start1;
705       p2 = start2;
706       while (1)
707         {
708           int c1, c2;
709 
710           if (p1 == e1)
711             c1 = 0;
712           else
713             c1 = *p1++;
714           if (p2 == e2)
715             c2 = 0;
716           else
717             c2 = *p2++;
718 
719           if (c1 != c2)
720             /* Reverse sign here so upper case comes out last.  */
721             return c2 - c1;
722           if (!c1)
723             break;
724         }
725 
726       return 0;
727     }
728 }
729 
730 /* Sort INFILE, whose size is TOTAL,
731    assuming that is small enough to be done in-core,
732    then indexify it and send the output to OUTFILE (or to stdout).  */
733 
734 void
sort_in_core(char * infile,int total,char * outfile)735 sort_in_core (char *infile, int total, char *outfile)
736 {
737   char **nextline;
738   char *data = (char *) xmalloc (total + 1);
739   char *file_data;
740   long file_size;
741   int i;
742   FILE *ostream = stdout;
743   struct lineinfo *lineinfo;
744 
745   /* Read the contents of the file into the moby array `data'. */
746 
747   int desc = open (infile, O_RDONLY, 0);
748 
749   if (desc < 0)
750     fatal (_("failure reopening %s"), infile);
751   for (file_size = 0;;)
752     {
753       i = read (desc, data + file_size, total - file_size);
754       if (i <= 0)
755         break;
756       file_size += i;
757     }
758   file_data = data;
759   data[file_size] = 0;
760 
761   close (desc);
762 
763   if (file_size > 0 && data[0] != '\\' && data[0] != '@')
764     {
765       error (_("%s: not a texinfo index file"), infile);
766       return;
767     }
768 
769   init_char_order ();
770 
771   /* Sort routines want to know this address. */
772 
773   text_base = data;
774 
775   /* Create the array of pointers to lines, with a default size
776      frequently enough.  */
777 
778   nlines = total / 50;
779   if (!nlines)
780     nlines = 2;
781   linearray = (char **) xmalloc (nlines * sizeof (char *));
782 
783   /* `nextline' points to the next free slot in this array.
784      `nlines' is the allocated size.  */
785 
786   nextline = linearray;
787 
788   /* Parse the input file's data, and make entries for the lines.  */
789 
790   nextline = parsefile (infile, nextline, file_data, file_size);
791   if (nextline == 0)
792     {
793       error (_("%s: not a texinfo index file"), infile);
794       return;
795     }
796 
797   /* Sort the lines. */
798 
799   /* If we have enough space, find the first keyfield of each line in advance.
800      Make a `struct lineinfo' for each line, which records the keyfield
801      as well as the line, and sort them.  */
802 
803   lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
804 
805   if (lineinfo)
806     {
807       size_t idx = 0;
808       struct lineinfo *lp;
809       char **p;
810 
811       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
812         {
813 	  lp->idx = idx++;
814           lp->text = *p;
815           lp->key.text = find_field (keyfields, *p, &lp->keylen);
816           if (keyfields->numeric)
817             lp->key.number = find_value (lp->key.text, lp->keylen);
818         }
819 
820       qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
821              compare_prepared);
822 
823       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
824         *p = lp->text;
825 
826       free (lineinfo);
827     }
828   else
829     qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
830 
831   /* Open the output file. */
832 
833   if (outfile)
834     {
835       ostream = fopen (outfile, "w");
836       if (!ostream)
837         pfatal_with_name (outfile);
838     }
839 
840   writelines (linearray, nextline - linearray, ostream);
841   if (outfile)
842     fclose (ostream);
843 
844   free (linearray);
845   free (data);
846 }
847 
848 /* Parse an input string in core into lines.
849    DATA is the input string, and SIZE is its length.
850    Data goes in LINEARRAY starting at NEXTLINE.
851    The value returned is the first entry in LINEARRAY still unused.
852    Value 0 means input file contents are invalid.  */
853 
854 char **
parsefile(char * filename,char ** nextline,char * data,long int size)855 parsefile (char *filename, char **nextline, char *data, long int size)
856 {
857   char *p, *end;
858   char **line = nextline;
859 
860   p = data;
861   end = p + size;
862   *end = 0;
863 
864   while (p != end)
865     {
866       if (p[0] != '\\' && p[0] != '@')
867         return 0;
868 
869       *line = p;
870 
871       /* Find the first letter of the first field of this line.  If it
872          is different from the first letter of the first field of the
873          first line, we need initial headers in the output index.  */
874       while (*p && *p != '{')
875         p++;
876       if (p == end)
877         return 0;
878       p++;
879       if (first_initial)
880         {
881           if (first_initial != toupper (*p))
882             need_initials = 1;
883         }
884       else
885         first_initial = toupper (*p);
886 
887       while (*p && *p != '\n')
888         p++;
889       if (p != end)
890         p++;
891 
892       line++;
893       if (line == linearray + nlines)
894         {
895           char **old = linearray;
896           linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
897           line += linearray - old;
898         }
899     }
900 
901   return line;
902 }
903 
904 /* Indexification is a filter applied to the sorted lines
905    as they are being written to the output file.
906    Multiple entries for the same name, with different page numbers,
907    get combined into a single entry with multiple page numbers.
908    The first braced field, which is used for sorting, is discarded.
909    However, its first character is examined, folded to lower case,
910    and if it is different from that in the previous line fed to us
911    a \initial line is written with one argument, the new initial.
912 
913    If an entry has four braced fields, then the second and third
914    constitute primary and secondary names.
915    In this case, each change of primary name
916    generates a \primary line which contains only the primary name,
917    and in between these are \secondary lines which contain
918    just a secondary name and page numbers. */
919 
920 /* The last primary name we wrote a \primary entry for.
921    If only one level of indexing is being done, this is the last name seen. */
922 char *lastprimary;
923 /* Length of storage allocated for lastprimary. */
924 int lastprimarylength;
925 
926 /* Similar, for the secondary name. */
927 char *lastsecondary;
928 int lastsecondarylength;
929 
930 /* Zero if we are not in the middle of writing an entry.
931    One if we have written the beginning of an entry but have not
932    yet written any page numbers into it.
933    Greater than one if we have written the beginning of an entry
934    plus at least one page number. */
935 int pending;
936 
937 /* The initial (for sorting purposes) of the last primary entry written.
938    When this changes, a \initial {c} line is written */
939 
940 char *lastinitial;
941 
942 int lastinitiallength;
943 
944 /* When we need a string of length 1 for the value of lastinitial,
945    store it here.  */
946 
947 char lastinitial1[2];
948 
949 /* Initialize static storage for writing an index. */
950 
951 void
init_index(void)952 init_index (void)
953 {
954   pending = 0;
955   lastinitial = lastinitial1;
956   lastinitial1[0] = 0;
957   lastinitial1[1] = 0;
958   lastinitiallength = 0;
959   lastprimarylength = 100;
960   lastprimary = (char *) xmalloc (lastprimarylength + 1);
961   memset (lastprimary, '\0', lastprimarylength + 1);
962   lastsecondarylength = 100;
963   lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
964   memset (lastsecondary, '\0', lastsecondarylength + 1);
965 }
966 
967 /* Indexify.  Merge entries for the same name,
968    insert headers for each initial character, etc.  */
969 
970 void
indexify(char * line,FILE * ostream)971 indexify (char *line, FILE *ostream)
972 {
973   char *primary, *secondary, *pagenumber;
974   int primarylength, secondarylength = 0, pagelength;
975   int nosecondary;
976   int initiallength;
977   char *initial;
978   char initial1[2];
979   register char *p;
980 
981   /* First, analyze the parts of the entry fed to us this time. */
982 
983   p = find_braced_pos (line, 0, 0, 0);
984   if (*p == '{')
985     {
986       initial = p;
987       /* Get length of inner pair of braces starting at `p',
988          including that inner pair of braces.  */
989       initiallength = find_braced_end (p + 1) + 1 - p;
990     }
991   else
992     {
993       initial = initial1;
994       initial1[0] = toupper (*p);
995       initial1[1] = 0;
996       initiallength = 1;
997     }
998 
999   pagenumber = find_braced_pos (line, 1, 0, 0);
1000   pagelength = find_braced_end (pagenumber) - pagenumber;
1001   if (pagelength == 0)
1002     fatal (_("No page number in %s"), line);
1003 
1004   primary = find_braced_pos (line, 2, 0, 0);
1005   primarylength = find_braced_end (primary) - primary;
1006 
1007   secondary = find_braced_pos (line, 3, 0, 0);
1008   nosecondary = !*secondary;
1009   if (!nosecondary)
1010     secondarylength = find_braced_end (secondary) - secondary;
1011 
1012   /* If the primary is different from before, make a new primary entry. */
1013   if (strncmp (primary, lastprimary, primarylength))
1014     {
1015       /* Close off current secondary entry first, if one is open. */
1016       if (pending)
1017         {
1018           fputs ("}\n", ostream);
1019           pending = 0;
1020         }
1021 
1022       /* If this primary has a different initial, include an entry for
1023          the initial. */
1024       if (need_initials &&
1025           (initiallength != lastinitiallength ||
1026            strncmp (initial, lastinitial, initiallength)))
1027         {
1028           fprintf (ostream, "\\initial {");
1029           fwrite (initial, 1, initiallength, ostream);
1030           fputs ("}\n", ostream);
1031           if (initial == initial1)
1032             {
1033               lastinitial = lastinitial1;
1034               *lastinitial1 = *initial1;
1035             }
1036           else
1037             {
1038               lastinitial = initial;
1039             }
1040           lastinitiallength = initiallength;
1041         }
1042 
1043       /* Make the entry for the primary.  */
1044       if (nosecondary)
1045         fputs ("\\entry {", ostream);
1046       else
1047         fputs ("\\primary {", ostream);
1048       fwrite (primary, primarylength, 1, ostream);
1049       if (nosecondary)
1050         {
1051           fputs ("}{", ostream);
1052           pending = 1;
1053         }
1054       else
1055         fputs ("}\n", ostream);
1056 
1057       /* Record name of most recent primary. */
1058       if (lastprimarylength < primarylength)
1059         {
1060           lastprimarylength = primarylength + 100;
1061           lastprimary = (char *) xrealloc (lastprimary,
1062                                            1 + lastprimarylength);
1063         }
1064       strncpy (lastprimary, primary, primarylength);
1065       lastprimary[primarylength] = 0;
1066 
1067       /* There is no current secondary within this primary, now. */
1068       lastsecondary[0] = 0;
1069     }
1070 
1071   /* Should not have an entry with no subtopic following one with a
1072      subtopic. */
1073 
1074   if (nosecondary && *lastsecondary)
1075     error (_("entry %s follows an entry with a secondary name"), line);
1076 
1077   /* Start a new secondary entry if necessary. */
1078   if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1079     {
1080       if (pending)
1081         {
1082           fputs ("}\n", ostream);
1083           pending = 0;
1084         }
1085 
1086       /* Write the entry for the secondary.  */
1087       fputs ("\\secondary {", ostream);
1088       fwrite (secondary, secondarylength, 1, ostream);
1089       fputs ("}{", ostream);
1090       pending = 1;
1091 
1092       /* Record name of most recent secondary. */
1093       if (lastsecondarylength < secondarylength)
1094         {
1095           lastsecondarylength = secondarylength + 100;
1096           lastsecondary = (char *) xrealloc (lastsecondary,
1097                                              1 + lastsecondarylength);
1098         }
1099       strncpy (lastsecondary, secondary, secondarylength);
1100       lastsecondary[secondarylength] = 0;
1101     }
1102 
1103   /* Here to add one more page number to the current entry. */
1104   if (pending++ != 1)
1105     fputs (", ", ostream);  /* Punctuate first, if this is not the first. */
1106   fwrite (pagenumber, pagelength, 1, ostream);
1107 }
1108 
1109 /* Close out any unfinished output entry. */
1110 
1111 void
finish_index(FILE * ostream)1112 finish_index (FILE *ostream)
1113 {
1114   if (pending)
1115     fputs ("}\n", ostream);
1116   free (lastprimary);
1117   free (lastsecondary);
1118 }
1119 
1120 /* Copy the lines in the sorted order.
1121    Each line is copied out of the input file it was found in. */
1122 
1123 void
writelines(char ** linearray,int nlines,FILE * ostream)1124 writelines (char **linearray, int nlines, FILE *ostream)
1125 {
1126   char **stop_line = linearray + nlines;
1127   char **next_line;
1128 
1129   init_index ();
1130 
1131   /* Output the text of the lines, and free the buffer space. */
1132 
1133   for (next_line = linearray; next_line != stop_line; next_line++)
1134     {
1135       /* Output the line only if distinct from previous one.  */
1136       if (next_line == linearray
1137       /* Compare previous line with this one, using only the
1138          explicitly specd keyfields. */
1139           || compare_general (*(next_line - 1), *next_line, 0L, 0L,
1140                               num_keyfields - 1))
1141         {
1142           char *p = *next_line;
1143           char c;
1144 
1145           while ((c = *p++) && c != '\n')
1146             /* Do nothing. */ ;
1147           *(p - 1) = 0;
1148           indexify (*next_line, ostream);
1149         }
1150     }
1151 
1152   finish_index (ostream);
1153 }
1154 
1155 /* Print error message and exit.  */
1156 
1157 void
fatal(const char * format,const char * arg)1158 fatal (const char *format, const char *arg)
1159 {
1160   error (format, arg);
1161   xexit (1);
1162 }
1163 
1164 /* Print error message.  FORMAT is printf control string, ARG is arg for it. */
1165 void
error(const char * format,const char * arg)1166 error (const char *format, const char *arg)
1167 {
1168   printf ("%s: ", program_name);
1169   printf (format, arg);
1170   if (format[strlen (format) -1] != '\n')
1171     printf ("\n");
1172 }
1173 
1174 void
perror_with_name(const char * name)1175 perror_with_name (const char *name)
1176 {
1177   fprintf (stderr, "%s: ", program_name);
1178   perror (name);
1179 }
1180 
1181 void
pfatal_with_name(const char * name)1182 pfatal_with_name (const char *name)
1183 {
1184   perror_with_name (name);
1185   xexit (1);
1186 }
1187 
1188 
1189 /* Return a newly-allocated string concatenating S1, S2, and S3.  */
1190 
1191 static char *
concat3(const char * s1,const char * s2,const char * s3)1192 concat3 (const char *s1, const char *s2, const char *s3)
1193 {
1194   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1195   char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1196 
1197   strcpy (result, s1);
1198   strcpy (result + len1, s2);
1199   strcpy (result + len1 + len2, s3);
1200   *(result + len1 + len2 + len3) = 0;
1201 
1202   return result;
1203 }
1204