1 /* index.c -- indexing for Texinfo. 2 $Id: index.c,v 1.1.1.1 2000/02/09 01:25:16 espie Exp $ 3 4 Copyright (C) 1998, 99 Free Software Foundation, Inc. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software Foundation, 18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 19 20 #include "system.h" 21 #include "index.h" 22 #include "lang.h" 23 #include "macro.h" 24 #include "toc.h" 25 26 /* An index element... */ 27 typedef struct index_elt 28 { 29 struct index_elt *next; 30 char *entry; /* The index entry itself, after expansion. */ 31 char *entry_text; /* The original, non-expanded entry text. */ 32 char *node; /* The node from whence it came. */ 33 int code; /* Nonzero means add `@code{...}' when 34 printing this element. */ 35 int defining_line; /* Line number where this entry was written. */ 36 char *defining_file; /* Source file for defining_line. */ 37 } INDEX_ELT; 38 39 40 /* A list of short-names for each index. 41 There are two indices into the the_indices array. 42 * read_index is the index that points to the list of index 43 entries that we will find if we ask for the list of entries for 44 this name. 45 * write_index is the index that points to the list of index entries 46 that we will add new entries to. 47 48 Initially, read_index and write_index are the same, but the 49 @syncodeindex and @synindex commands can change the list we add 50 entries to. 51 52 For example, after the commands 53 @cindex foo 54 @defindex ii 55 @synindex cp ii 56 @cindex bar 57 58 the cp index will contain the entry `foo', and the new ii 59 index will contain the entry `bar'. This is consistent with the 60 way texinfo.tex handles the same situation. 61 62 In addition, for each index, it is remembered whether that index is 63 a code index or not. Code indices have @code{} inserted around the 64 first word when they are printed with printindex. */ 65 typedef struct 66 { 67 char *name; 68 int read_index; /* index entries for `name' */ 69 int write_index; /* store index entries here, @synindex can change it */ 70 int code; 71 } INDEX_ALIST; 72 73 INDEX_ALIST **name_index_alist = NULL; 74 75 /* An array of pointers. Each one is for a different index. The 76 "synindex" command changes which array slot is pointed to by a 77 given "index". */ 78 INDEX_ELT **the_indices = NULL; 79 80 /* The number of defined indices. */ 81 int defined_indices = 0; 82 83 /* Stuff for defining commands on the fly. */ 84 COMMAND **user_command_array = NULL; 85 int user_command_array_len = 0; 86 87 /* How to compare index entries for sorting. May be set to strcoll. */ 88 int (*index_compare_fn) () = strcasecmp; 89 90 /* Find which element in the known list of indices has this name. 91 Returns -1 if NAME isn't found. */ 92 static int 93 find_index_offset (name) 94 char *name; 95 { 96 int i; 97 for (i = 0; i < defined_indices; i++) 98 if (name_index_alist[i] && STREQ (name, name_index_alist[i]->name)) 99 return i; 100 return -1; 101 } 102 103 /* Return a pointer to the entry of (name . index) for this name. 104 Return NULL if the index doesn't exist. */ 105 INDEX_ALIST * 106 find_index (name) 107 char *name; 108 { 109 int offset = find_index_offset (name); 110 if (offset > -1) 111 return name_index_alist[offset]; 112 else 113 return NULL; 114 } 115 116 /* User-defined commands, which happens only from user-defined indexes. 117 Used to initialize the builtin indices, too. */ 118 void 119 define_user_command (name, proc, needs_braces_p) 120 char *name; 121 COMMAND_FUNCTION *proc; 122 int needs_braces_p; 123 { 124 int slot = user_command_array_len; 125 user_command_array_len++; 126 127 if (!user_command_array) 128 user_command_array = xmalloc (1 * sizeof (COMMAND *)); 129 130 user_command_array = xrealloc (user_command_array, 131 (1 + user_command_array_len) * sizeof (COMMAND *)); 132 133 user_command_array[slot] = xmalloc (sizeof (COMMAND)); 134 user_command_array[slot]->name = xstrdup (name); 135 user_command_array[slot]->proc = proc; 136 user_command_array[slot]->argument_in_braces = needs_braces_p; 137 } 138 139 /* Please release me, let me go... */ 140 static void 141 free_index (index) 142 INDEX_ELT *index; 143 { 144 INDEX_ELT *temp; 145 146 while ((temp = index)) 147 { 148 free (temp->entry); 149 free (temp->entry_text); 150 /* Do not free the node, because we already freed the tag table, 151 which freed all the node names. */ 152 /* free (temp->node); */ 153 index = index->next; 154 free (temp); 155 } 156 } 157 158 /* Flush an index by name. This will delete the list of entries that 159 would be written by a @printindex command for this index. */ 160 static void 161 undefindex (name) 162 char *name; 163 { 164 int i; 165 int which = find_index_offset (name); 166 167 /* The index might have already been freed if this was the target of 168 an @synindex. */ 169 if (which < 0 || !name_index_alist[which]) 170 return; 171 172 i = name_index_alist[which]->read_index; 173 174 free_index (the_indices[i]); 175 the_indices[i] = NULL; 176 177 free (name_index_alist[which]->name); 178 free (name_index_alist[which]); 179 name_index_alist[which] = NULL; 180 } 181 182 /* Add the arguments to the current index command to the index NAME. 183 html fixxme generate specific html anchor */ 184 static void 185 index_add_arg (name) 186 char *name; 187 { 188 int which; 189 char *index_entry; 190 INDEX_ALIST *tem; 191 192 tem = find_index (name); 193 194 which = tem ? tem->write_index : -1; 195 196 if (macro_expansion_output_stream && !executing_string) 197 append_to_expansion_output (input_text_offset + 1); 198 199 get_rest_of_line (0, &index_entry); 200 ignore_blank_line (); 201 202 if (macro_expansion_output_stream && !executing_string) 203 { 204 char *index_line = xmalloc (strlen (index_entry) + 2); 205 sprintf (index_line, "%s\n", index_entry); 206 me_execute_string_keep_state (index_line, NULL); 207 free (index_line); 208 } 209 210 if (which < 0) 211 { 212 line_error (_("Unknown index `%s'"), name); 213 free (index_entry); 214 } 215 else 216 { 217 INDEX_ELT *new = xmalloc (sizeof (INDEX_ELT)); 218 new->next = the_indices[which]; 219 new->entry_text = index_entry; 220 new->entry = NULL; 221 new->node = current_node ? current_node : xstrdup (""); 222 new->code = tem->code; 223 new->defining_line = line_number - 1; 224 /* We need to make a copy since input_filename may point to 225 something that goes away, for example, inside a macro. 226 (see the findexerr test). */ 227 new->defining_file = xstrdup (input_filename); 228 the_indices[which] = new; 229 } 230 } 231 232 /* The function which user defined index commands call. */ 233 static void 234 gen_index () 235 { 236 char *name = xstrdup (command); 237 if (strlen (name) >= strlen ("index")) 238 name[strlen (name) - strlen ("index")] = 0; 239 index_add_arg (name); 240 free (name); 241 } 242 243 /* Define an index known as NAME. We assign the slot number. 244 If CODE is nonzero, make this a code index. */ 245 static void 246 defindex (name, code) 247 char *name; 248 int code; 249 { 250 int i, slot; 251 252 /* If it already exists, flush it. */ 253 undefindex (name); 254 255 /* Try to find an empty slot. */ 256 slot = -1; 257 for (i = 0; i < defined_indices; i++) 258 if (!name_index_alist[i]) 259 { 260 slot = i; 261 break; 262 } 263 264 if (slot < 0) 265 { /* No such luck. Make space for another index. */ 266 slot = defined_indices; 267 defined_indices++; 268 269 name_index_alist = (INDEX_ALIST **) 270 xrealloc (name_index_alist, (1 + defined_indices) 271 * sizeof (INDEX_ALIST *)); 272 the_indices = (INDEX_ELT **) 273 xrealloc (the_indices, (1 + defined_indices) * sizeof (INDEX_ELT *)); 274 } 275 276 /* We have a slot. Start assigning. */ 277 name_index_alist[slot] = xmalloc (sizeof (INDEX_ALIST)); 278 name_index_alist[slot]->name = xstrdup (name); 279 name_index_alist[slot]->read_index = slot; 280 name_index_alist[slot]->write_index = slot; 281 name_index_alist[slot]->code = code; 282 283 the_indices[slot] = NULL; 284 } 285 286 /* Define an index NAME, implicitly @code if CODE is nonzero. */ 287 static void 288 top_defindex (name, code) 289 char *name; 290 int code; 291 { 292 char *temp; 293 294 temp = xmalloc (1 + strlen (name) + strlen ("index")); 295 sprintf (temp, "%sindex", name); 296 define_user_command (temp, gen_index, 0); 297 defindex (name, code); 298 free (temp); 299 } 300 301 /* Set up predefined indices. */ 302 void 303 init_indices () 304 { 305 int i; 306 307 /* Create the default data structures. */ 308 309 /* Initialize data space. */ 310 if (!the_indices) 311 { 312 the_indices = xmalloc ((1 + defined_indices) * sizeof (INDEX_ELT *)); 313 the_indices[defined_indices] = NULL; 314 315 name_index_alist = xmalloc ((1 + defined_indices) 316 * sizeof (INDEX_ALIST *)); 317 name_index_alist[defined_indices] = NULL; 318 } 319 320 /* If there were existing indices, get rid of them now. */ 321 for (i = 0; i < defined_indices; i++) 322 { 323 undefindex (name_index_alist[i]->name); 324 if (name_index_alist[i]) 325 { /* Suppose we're called with two input files, and the first 326 does a @synindex pg cp. Then, when we get here to start 327 the second file, the "pg" element won't get freed by 328 undefindex (because it's pointing to "cp"). So free it 329 here; otherwise, when we try to define the pg index again 330 just below, it will still point to cp. */ 331 free (name_index_alist[i]->name); 332 free (name_index_alist[i]); 333 name_index_alist[i] = NULL; 334 } 335 } 336 337 /* Add the default indices. */ 338 top_defindex ("cp", 0); /* cp is the only non-code index. */ 339 top_defindex ("fn", 1); 340 top_defindex ("ky", 1); 341 top_defindex ("pg", 1); 342 top_defindex ("tp", 1); 343 top_defindex ("vr", 1); 344 } 345 346 /* Given an index name, return the offset in the_indices of this index, 347 or -1 if there is no such index. */ 348 int 349 translate_index (name) 350 char *name; 351 { 352 INDEX_ALIST *which = find_index (name); 353 354 if (which) 355 return which->read_index; 356 else 357 return -1; 358 } 359 360 /* Return the index list which belongs to NAME. */ 361 INDEX_ELT * 362 index_list (name) 363 char *name; 364 { 365 int which = translate_index (name); 366 if (which < 0) 367 return (INDEX_ELT *) -1; 368 else 369 return the_indices[which]; 370 } 371 372 /* Define a new index command. Arg is name of index. */ 373 static void 374 gen_defindex (code) 375 int code; 376 { 377 char *name; 378 get_rest_of_line (0, &name); 379 380 if (find_index (name)) 381 { 382 line_error (_("Index `%s' already exists"), name); 383 } 384 else 385 { 386 char *temp = xmalloc (strlen (name) + sizeof ("index")); 387 sprintf (temp, "%sindex", name); 388 define_user_command (temp, gen_index, 0); 389 defindex (name, code); 390 free (temp); 391 } 392 393 free (name); 394 } 395 396 void 397 cm_defindex () 398 { 399 gen_defindex (0); 400 } 401 402 void 403 cm_defcodeindex () 404 { 405 gen_defindex (1); 406 } 407 408 /* Expects 2 args, on the same line. Both are index abbreviations. 409 Make the first one be a synonym for the second one, i.e. make the 410 first one have the same index as the second one. */ 411 void 412 cm_synindex () 413 { 414 int source, target; 415 char *abbrev1, *abbrev2; 416 417 skip_whitespace (); 418 get_until_in_line (0, " ", &abbrev1); 419 target = find_index_offset (abbrev1); 420 skip_whitespace (); 421 get_until_in_line (0, " ", &abbrev2); 422 source = find_index_offset (abbrev2); 423 if (source < 0 || target < 0) 424 { 425 line_error (_("Unknown index `%s' and/or `%s' in @synindex"), 426 abbrev1, abbrev2); 427 } 428 else 429 { 430 name_index_alist[target]->write_index 431 = name_index_alist[source]->write_index; 432 } 433 434 free (abbrev1); 435 free (abbrev2); 436 } 437 438 void 439 cm_pindex () /* Pinhead index. */ 440 { 441 index_add_arg ("pg"); 442 } 443 444 void 445 cm_vindex () /* Variable index. */ 446 { 447 index_add_arg ("vr"); 448 } 449 450 void 451 cm_kindex () /* Key index. */ 452 { 453 index_add_arg ("ky"); 454 } 455 456 void 457 cm_cindex () /* Concept index. */ 458 { 459 index_add_arg ("cp"); 460 } 461 462 void 463 cm_findex () /* Function index. */ 464 { 465 index_add_arg ("fn"); 466 } 467 468 void 469 cm_tindex () /* Data Type index. */ 470 { 471 index_add_arg ("tp"); 472 } 473 474 int 475 index_element_compare (element1, element2) 476 INDEX_ELT **element1, **element2; 477 { 478 return index_compare_fn ((*element1)->entry, (*element2)->entry); 479 } 480 481 /* Force all index entries to be unique. */ 482 void 483 make_index_entries_unique (array, count) 484 INDEX_ELT **array; 485 int count; 486 { 487 int i, j; 488 INDEX_ELT **copy; 489 int counter = 1; 490 491 copy = xmalloc ((1 + count) * sizeof (INDEX_ELT *)); 492 493 for (i = 0, j = 0; i < count; i++) 494 { 495 if (i == (count - 1) 496 || array[i]->node != array[i + 1]->node 497 || !STREQ (array[i]->entry, array[i + 1]->entry)) 498 copy[j++] = array[i]; 499 else 500 { 501 free (array[i]->entry); 502 free (array[i]->entry_text); 503 free (array[i]); 504 } 505 } 506 copy[j] = NULL; 507 508 /* Now COPY contains only unique entries. Duplicated entries in the 509 original array have been freed. Replace the current array with 510 the copy, fixing the NEXT pointers. */ 511 for (i = 0; copy[i]; i++) 512 { 513 copy[i]->next = copy[i + 1]; 514 515 /* Fix entry names which are the same. They point to different nodes, 516 so we make the entry name unique. */ 517 if (copy[i+1] 518 && STREQ (copy[i]->entry, copy[i + 1]->entry) 519 && !html) 520 { 521 char *new_entry_name; 522 523 new_entry_name = xmalloc (10 + strlen (copy[i]->entry)); 524 sprintf (new_entry_name, "%s <%d>", copy[i]->entry, counter); 525 free (copy[i]->entry); 526 copy[i]->entry = new_entry_name; 527 counter++; 528 } 529 else 530 counter = 1; 531 532 array[i] = copy[i]; 533 } 534 array[i] = NULL; 535 536 /* Free the storage used only by COPY. */ 537 free (copy); 538 } 539 540 /* Sort the index passed in INDEX, returning an array of 541 pointers to elements. The array is terminated with a NULL 542 pointer. We call qsort because it's supposed to be fast. 543 I think this looks bad. */ 544 INDEX_ELT ** 545 sort_index (index) 546 INDEX_ELT *index; 547 { 548 INDEX_ELT **array; 549 INDEX_ELT *temp = index; 550 int count = 0; 551 int save_line_number = line_number; 552 char *save_input_filename = input_filename; 553 int save_html = html; 554 555 /* Pretend we are in non-HTML mode, for the purpose of getting the 556 expanded index entry that lacks any markup and other HTML escape 557 characters which could produce a wrong sort order. */ 558 /* fixme: html: this still causes some markup, such as non-ASCII 559 characters @AE{} etc., to sort incorrectly. */ 560 html = 0; 561 562 while (temp) 563 { 564 count++; 565 temp = temp->next; 566 } 567 568 /* We have the length. Make an array. */ 569 570 array = xmalloc ((count + 1) * sizeof (INDEX_ELT *)); 571 count = 0; 572 temp = index; 573 574 while (temp) 575 { 576 array[count++] = temp; 577 578 /* Set line number and input filename to the source line for this 579 index entry, as this expansion finds any errors. */ 580 line_number = array[count - 1]->defining_line; 581 input_filename = array[count - 1]->defining_file; 582 583 /* If this particular entry should be printed as a "code" index, 584 then expand it as @code{entry}, i.e. as in fixed-width font. */ 585 array[count-1]->entry = expansion (temp->entry_text, 586 array[count-1]->code); 587 588 temp = temp->next; 589 } 590 array[count] = NULL; /* terminate the array. */ 591 line_number = save_line_number; 592 input_filename = save_input_filename; 593 html = save_html; 594 595 #ifdef HAVE_STRCOLL 596 /* This is not perfect. We should set (then restore) the locale to the 597 documentlanguage, so strcoll operates according to the document's 598 locale, not the user's. For now, I'm just going to assume that 599 those few new documents which use @documentlanguage will be 600 processed in the appropriate locale. In any case, don't use 601 strcoll in the C (aka POSIX) locale, that is the ASCII ordering. */ 602 if (language_code != en) 603 { 604 char *lang_env = getenv ("LANG"); 605 if (lang_env && !STREQ (lang_env, "C") && !STREQ (lang_env, "POSIX")) 606 index_compare_fn = strcoll; 607 } 608 #endif /* HAVE_STRCOLL */ 609 610 /* Sort the array. */ 611 qsort (array, count, sizeof (INDEX_ELT *), index_element_compare); 612 make_index_entries_unique (array, count); 613 return array; 614 } 615 616 /* Nonzero means that we are in the middle of printing an index. */ 617 int printing_index = 0; 618 619 /* Takes one arg, a short name of an index to print. 620 Outputs a menu of the sorted elements of the index. */ 621 void 622 cm_printindex () 623 { 624 int item; 625 INDEX_ELT *index; 626 INDEX_ELT *last_index = 0; 627 INDEX_ELT **array; 628 char *index_name; 629 unsigned line_length; 630 char *line; 631 int saved_inhibit_paragraph_indentation = inhibit_paragraph_indentation; 632 int saved_filling_enabled = filling_enabled; 633 int saved_line_number = line_number; 634 char *saved_input_filename = input_filename; 635 636 close_paragraph (); 637 get_rest_of_line (0, &index_name); 638 639 index = index_list (index_name); 640 if (index == (INDEX_ELT *)-1) 641 { 642 line_error (_("Unknown index `%s' in @printindex"), index_name); 643 free (index_name); 644 return; 645 } 646 647 /* Do this before sorting, so execute_string in index_element_compare 648 will give the same results as when we actually print. */ 649 printing_index = 1; 650 filling_enabled = 0; 651 inhibit_paragraph_indentation = 1; 652 array = sort_index (index); 653 654 close_paragraph (); 655 if (html) 656 add_word ("<ul compact>"); 657 else if (!no_headers) 658 add_word ("* Menu:\n\n"); 659 660 me_inhibit_expansion++; 661 662 /* This will probably be enough. */ 663 line_length = 100; 664 line = xmalloc (line_length); 665 666 for (item = 0; (index = array[item]); item++) 667 { 668 /* A pathological document might have an index entry outside of any 669 node. Don't crash; try using the section name instead. */ 670 char *index_node = index->node; 671 672 line_number = index->defining_line; 673 input_filename = index->defining_file; 674 675 if ((!index_node || !*index_node) && html) 676 index_node = toc_find_section_of_node (index_node); 677 678 if (!index_node || !*index_node) 679 { 680 line_error (_("Entry for index `%s' outside of any node"), 681 index_name); 682 if (html || !no_headers) 683 index_node = _("(outside of any node)"); 684 } 685 686 if (html) 687 /* fixme: html: we should use specific index anchors pointing 688 to the actual location of the indexed position (but then we 689 have to find something to wrap the anchor around). */ 690 { 691 if (last_index 692 && STREQ (last_index->entry_text, index->entry_text)) 693 add_word (", "); /* Don't repeat the previous entry. */ 694 else 695 { 696 /* In the HTML case, the expanded index entry is not 697 good for us, since it was expanded for non-HTML mode 698 inside sort_index. So we need to HTML-escape and 699 expand the original entry text here. */ 700 char *escaped_entry = xstrdup (index->entry_text); 701 char *expanded_entry; 702 703 /* expansion() doesn't HTML-escape the argument, so need 704 to do it separately. */ 705 escaped_entry = escape_string (escaped_entry); 706 expanded_entry = expansion (escaped_entry, index->code); 707 add_word_args ("\n<li>%s: ", expanded_entry); 708 free (escaped_entry); 709 free (expanded_entry); 710 } 711 add_word ("<a href=\""); 712 if (index->node && *index->node) 713 { 714 /* Make sure any non-macros in the node name are expanded. */ 715 in_fixed_width_font++; 716 index_node = expansion (index_node, 0); 717 in_fixed_width_font--; 718 add_anchor_name (index_node, 1); 719 add_word_args ("\">%s</a>", index_node); 720 free (index_node); 721 } 722 else if (STREQ (index_node, _("(outside of any node)"))) 723 { 724 add_anchor_name (index_node, 1); 725 add_word_args ("\">%s</a>", index_node); 726 } 727 else 728 /* If we use the section instead of the (missing) node, then 729 index_node already includes all we need except the #. */ 730 add_word_args ("#%s</a>", index_node); 731 } 732 else 733 { 734 unsigned new_length = strlen (index->entry); 735 736 if (new_length < 50) /* minimum length used below */ 737 new_length = 50; 738 new_length += strlen (index_node) + 7; /* * : .\n\0 */ 739 740 if (new_length > line_length) 741 { 742 line_length = new_length; 743 line = xrealloc (line, line_length); 744 } 745 /* Print the entry, nicely formatted. We've already 746 expanded any commands in index->entry, including any 747 implicit @code. Thus, can't call execute_string, since 748 @@ has turned into @. */ 749 if (!no_headers) 750 { 751 sprintf (line, "* %-37s ", index->entry); 752 line[2 + strlen (index->entry)] = ':'; 753 insert_string (line); 754 /* Make sure any non-macros in the node name are expanded. */ 755 in_fixed_width_font++; 756 execute_string ("%s.\n", index_node); 757 in_fixed_width_font--; 758 } 759 else 760 { 761 /* With --no-headers, the @node lines are gone, so 762 there's little sense in referring to them in the 763 index. Instead, output the number or name of the 764 section that corresponds to that node. */ 765 char *section_name = toc_find_section_of_node (index_node); 766 767 sprintf (line, "%-*s ", number_sections ? 50 : 1, index->entry); 768 line[strlen (index->entry)] = ':'; 769 insert_string (line); 770 if (section_name) 771 { 772 int idx = 0; 773 unsigned ref_len = strlen (section_name) + 30; 774 775 if (ref_len > line_length) 776 { 777 line_length = ref_len; 778 line = xrealloc (line, line_length); 779 } 780 781 if (number_sections) 782 { 783 while (section_name[idx] 784 && (isdigit (section_name[idx]) 785 || (idx && section_name[idx] == '.'))) 786 idx++; 787 } 788 if (idx) 789 sprintf (line, " See %.*s.\n", idx, section_name); 790 else 791 sprintf (line, "\n See ``%s''.\n", section_name); 792 insert_string (line); 793 } 794 else 795 { 796 insert_string (" "); /* force a blank */ 797 execute_string ("See node %s.\n", index_node); 798 } 799 } 800 } 801 802 /* Prevent `output_paragraph' from growing to the size of the 803 whole index. */ 804 flush_output (); 805 last_index = index; 806 } 807 808 free (line); 809 free (index_name); 810 811 me_inhibit_expansion--; 812 813 printing_index = 0; 814 free (array); 815 close_single_paragraph (); 816 filling_enabled = saved_filling_enabled; 817 inhibit_paragraph_indentation = saved_inhibit_paragraph_indentation; 818 input_filename = saved_input_filename; 819 line_number = saved_line_number; 820 821 if (html) 822 add_word ("</ul>"); 823 } 824