1 /* $NetBSD: ref.cpp,v 1.1.1.1 2016/01/13 18:41:49 christos Exp $ */ 2 3 // -*- C++ -*- 4 /* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2003 5 Free Software Foundation, Inc. 6 Written by James Clark (jjc@jclark.com) 7 8 This file is part of groff. 9 10 groff is free software; you can redistribute it and/or modify it under 11 the terms of the GNU General Public License as published by the Free 12 Software Foundation; either version 2, or (at your option) any later 13 version. 14 15 groff is distributed in the hope that it will be useful, but WITHOUT ANY 16 WARRANTY; without even the implied warranty of MERCHANTABILITY or 17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 18 for more details. 19 20 You should have received a copy of the GNU General Public License along 21 with groff; see the file COPYING. If not, write to the Free Software 22 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */ 23 24 #include "refer.h" 25 #include "refid.h" 26 #include "ref.h" 27 #include "token.h" 28 29 static const char *find_day(const char *, const char *, const char **); 30 static int find_month(const char *start, const char *end); 31 static void abbreviate_names(string &); 32 33 #define DEFAULT_ARTICLES "the\000a\000an" 34 35 string articles(DEFAULT_ARTICLES, sizeof(DEFAULT_ARTICLES)); 36 37 // Multiple occurrences of fields are separated by FIELD_SEPARATOR. 38 const char FIELD_SEPARATOR = '\0'; 39 40 const char MULTI_FIELD_NAMES[] = "AE"; 41 const char *AUTHOR_FIELDS = "AQ"; 42 43 enum { OTHER, JOURNAL_ARTICLE, BOOK, ARTICLE_IN_BOOK, TECH_REPORT, BELL_TM }; 44 45 const char *reference_types[] = { 46 "other", 47 "journal-article", 48 "book", 49 "article-in-book", 50 "tech-report", 51 "bell-tm", 52 }; 53 54 static string temp_fields[256]; 55 56 reference::reference(const char *start, int len, reference_id *ridp) 57 : h(0), merged(0), no(-1), field(0), nfields(0), label_ptr(0), 58 computed_authors(0), last_needed_author(-1), nauthors(-1) 59 { 60 int i; 61 for (i = 0; i < 256; i++) 62 field_index[i] = NULL_FIELD_INDEX; 63 if (ridp) 64 rid = *ridp; 65 if (start == 0) 66 return; 67 if (len <= 0) 68 return; 69 const char *end = start + len; 70 const char *ptr = start; 71 assert(*ptr == '%'); 72 while (ptr < end) { 73 if (ptr + 1 < end && ptr[1] != '\0' 74 && ((ptr[1] != '%' && ptr[1] == annotation_field) 75 || (ptr + 2 < end && ptr[1] == '%' && ptr[2] != '\0' 76 && discard_fields.search(ptr[2]) < 0))) { 77 if (ptr[1] == '%') 78 ptr++; 79 string &f = temp_fields[(unsigned char)ptr[1]]; 80 ptr += 2; 81 while (ptr < end && csspace(*ptr)) 82 ptr++; 83 for (;;) { 84 for (;;) { 85 if (ptr >= end) { 86 f += '\n'; 87 break; 88 } 89 f += *ptr; 90 if (*ptr++ == '\n') 91 break; 92 } 93 if (ptr >= end || *ptr == '%') 94 break; 95 } 96 } 97 else if (ptr + 1 < end && ptr[1] != '\0' && ptr[1] != '%' 98 && discard_fields.search(ptr[1]) < 0) { 99 string &f = temp_fields[(unsigned char)ptr[1]]; 100 if (f.length() > 0) { 101 if (strchr(MULTI_FIELD_NAMES, ptr[1]) != 0) 102 f += FIELD_SEPARATOR; 103 else 104 f.clear(); 105 } 106 ptr += 2; 107 if (ptr < end) { 108 if (*ptr == ' ') 109 ptr++; 110 for (;;) { 111 const char *p = ptr; 112 while (ptr < end && *ptr != '\n') 113 ptr++; 114 // strip trailing white space 115 const char *q = ptr; 116 while (q > p && q[-1] != '\n' && csspace(q[-1])) 117 q--; 118 while (p < q) 119 f += *p++; 120 if (ptr >= end) 121 break; 122 ptr++; 123 if (ptr >= end) 124 break; 125 if (*ptr == '%') 126 break; 127 f += ' '; 128 } 129 } 130 } 131 else { 132 // skip this field 133 for (;;) { 134 while (ptr < end && *ptr++ != '\n') 135 ; 136 if (ptr >= end || *ptr == '%') 137 break; 138 } 139 } 140 } 141 for (i = 0; i < 256; i++) 142 if (temp_fields[i].length() > 0) 143 nfields++; 144 field = new string[nfields]; 145 int j = 0; 146 for (i = 0; i < 256; i++) 147 if (temp_fields[i].length() > 0) { 148 field[j].move(temp_fields[i]); 149 if (abbreviate_fields.search(i) >= 0) 150 abbreviate_names(field[j]); 151 field_index[i] = j; 152 j++; 153 } 154 } 155 156 reference::~reference() 157 { 158 if (nfields > 0) 159 ad_delete(nfields) field; 160 } 161 162 // ref is the inline, this is the database ref 163 164 void reference::merge(reference &ref) 165 { 166 int i; 167 for (i = 0; i < 256; i++) 168 if (field_index[i] != NULL_FIELD_INDEX) 169 temp_fields[i].move(field[field_index[i]]); 170 for (i = 0; i < 256; i++) 171 if (ref.field_index[i] != NULL_FIELD_INDEX) 172 temp_fields[i].move(ref.field[ref.field_index[i]]); 173 for (i = 0; i < 256; i++) 174 field_index[i] = NULL_FIELD_INDEX; 175 int old_nfields = nfields; 176 nfields = 0; 177 for (i = 0; i < 256; i++) 178 if (temp_fields[i].length() > 0) 179 nfields++; 180 if (nfields != old_nfields) { 181 if (old_nfields > 0) 182 ad_delete(old_nfields) field; 183 field = new string[nfields]; 184 } 185 int j = 0; 186 for (i = 0; i < 256; i++) 187 if (temp_fields[i].length() > 0) { 188 field[j].move(temp_fields[i]); 189 field_index[i] = j; 190 j++; 191 } 192 merged = 1; 193 } 194 195 void reference::insert_field(unsigned char c, string &s) 196 { 197 assert(s.length() > 0); 198 if (field_index[c] != NULL_FIELD_INDEX) { 199 field[field_index[c]].move(s); 200 return; 201 } 202 assert(field_index[c] == NULL_FIELD_INDEX); 203 string *old_field = field; 204 field = new string[nfields + 1]; 205 int pos = 0; 206 int i; 207 for (i = 0; i < int(c); i++) 208 if (field_index[i] != NULL_FIELD_INDEX) 209 pos++; 210 for (i = 0; i < pos; i++) 211 field[i].move(old_field[i]); 212 field[pos].move(s); 213 for (i = pos; i < nfields; i++) 214 field[i + 1].move(old_field[i]); 215 if (nfields > 0) 216 ad_delete(nfields) old_field; 217 nfields++; 218 field_index[c] = pos; 219 for (i = c + 1; i < 256; i++) 220 if (field_index[i] != NULL_FIELD_INDEX) 221 field_index[i] += 1; 222 } 223 224 void reference::delete_field(unsigned char c) 225 { 226 if (field_index[c] == NULL_FIELD_INDEX) 227 return; 228 string *old_field = field; 229 field = new string[nfields - 1]; 230 int i; 231 for (i = 0; i < int(field_index[c]); i++) 232 field[i].move(old_field[i]); 233 for (i = field_index[c]; i < nfields - 1; i++) 234 field[i].move(old_field[i + 1]); 235 if (nfields > 0) 236 ad_delete(nfields) old_field; 237 nfields--; 238 field_index[c] = NULL_FIELD_INDEX; 239 for (i = c + 1; i < 256; i++) 240 if (field_index[i] != NULL_FIELD_INDEX) 241 field_index[i] -= 1; 242 } 243 244 void reference::compute_hash_code() 245 { 246 if (!rid.is_null()) 247 h = rid.hash(); 248 else { 249 h = 0; 250 for (int i = 0; i < nfields; i++) 251 if (field[i].length() > 0) { 252 h <<= 4; 253 h ^= hash_string(field[i].contents(), field[i].length()); 254 } 255 } 256 } 257 258 void reference::set_number(int n) 259 { 260 no = n; 261 } 262 263 const char SORT_SEP = '\001'; 264 const char SORT_SUB_SEP = '\002'; 265 const char SORT_SUB_SUB_SEP = '\003'; 266 267 // sep specifies additional word separators 268 269 void sortify_words(const char *s, const char *end, const char *sep, 270 string &result) 271 { 272 int non_empty = 0; 273 int need_separator = 0; 274 for (;;) { 275 const char *token_start = s; 276 if (!get_token(&s, end)) 277 break; 278 if ((s - token_start == 1 279 && (*token_start == ' ' 280 || *token_start == '\n' 281 || (sep && *token_start != '\0' 282 && strchr(sep, *token_start) != 0))) 283 || (s - token_start == 2 284 && token_start[0] == '\\' && token_start[1] == ' ')) { 285 if (non_empty) 286 need_separator = 1; 287 } 288 else { 289 const token_info *ti = lookup_token(token_start, s); 290 if (ti->sortify_non_empty(token_start, s)) { 291 if (need_separator) { 292 result += ' '; 293 need_separator = 0; 294 } 295 ti->sortify(token_start, s, result); 296 non_empty = 1; 297 } 298 } 299 } 300 } 301 302 void sortify_word(const char *s, const char *end, string &result) 303 { 304 for (;;) { 305 const char *token_start = s; 306 if (!get_token(&s, end)) 307 break; 308 const token_info *ti = lookup_token(token_start, s); 309 ti->sortify(token_start, s, result); 310 } 311 } 312 313 void sortify_other(const char *s, int len, string &key) 314 { 315 sortify_words(s, s + len, 0, key); 316 } 317 318 void sortify_title(const char *s, int len, string &key) 319 { 320 const char *end = s + len; 321 for (; s < end && (*s == ' ' || *s == '\n'); s++) 322 ; 323 const char *ptr = s; 324 for (;;) { 325 const char *token_start = ptr; 326 if (!get_token(&ptr, end)) 327 break; 328 if (ptr - token_start == 1 329 && (*token_start == ' ' || *token_start == '\n')) 330 break; 331 } 332 if (ptr < end) { 333 unsigned int first_word_len = ptr - s - 1; 334 const char *ae = articles.contents() + articles.length(); 335 for (const char *a = articles.contents(); 336 a < ae; 337 a = strchr(a, '\0') + 1) 338 if (first_word_len == strlen(a)) { 339 unsigned int j; 340 for (j = 0; j < first_word_len; j++) 341 if (a[j] != cmlower(s[j])) 342 break; 343 if (j >= first_word_len) { 344 s = ptr; 345 for (; s < end && (*s == ' ' || *s == '\n'); s++) 346 ; 347 break; 348 } 349 } 350 } 351 sortify_words(s, end, 0, key); 352 } 353 354 void sortify_name(const char *s, int len, string &key) 355 { 356 const char *last_name_end; 357 const char *last_name = find_last_name(s, s + len, &last_name_end); 358 sortify_word(last_name, last_name_end, key); 359 key += SORT_SUB_SUB_SEP; 360 if (last_name > s) 361 sortify_words(s, last_name, ".", key); 362 key += SORT_SUB_SUB_SEP; 363 if (last_name_end < s + len) 364 sortify_words(last_name_end, s + len, ".,", key); 365 } 366 367 void sortify_date(const char *s, int len, string &key) 368 { 369 const char *year_end; 370 const char *year_start = find_year(s, s + len, &year_end); 371 if (!year_start) { 372 // Things without years are often `forthcoming', so it makes sense 373 // that they sort after things with explicit years. 374 key += 'A'; 375 sortify_words(s, s + len, 0, key); 376 return; 377 } 378 int n = year_end - year_start; 379 while (n < 4) { 380 key += '0'; 381 n++; 382 } 383 while (year_start < year_end) 384 key += *year_start++; 385 int m = find_month(s, s + len); 386 if (m < 0) 387 return; 388 key += 'A' + m; 389 const char *day_end; 390 const char *day_start = find_day(s, s + len, &day_end); 391 if (!day_start) 392 return; 393 if (day_end - day_start == 1) 394 key += '0'; 395 while (day_start < day_end) 396 key += *day_start++; 397 } 398 399 // SORT_{SUB,SUB_SUB}_SEP can creep in from use of @ in label specification. 400 401 void sortify_label(const char *s, int len, string &key) 402 { 403 const char *end = s + len; 404 for (;;) { 405 const char *ptr; 406 for (ptr = s; 407 ptr < end && *ptr != SORT_SUB_SEP && *ptr != SORT_SUB_SUB_SEP; 408 ptr++) 409 ; 410 if (ptr > s) 411 sortify_words(s, ptr, 0, key); 412 s = ptr; 413 if (s >= end) 414 break; 415 key += *s++; 416 } 417 } 418 419 void reference::compute_sort_key() 420 { 421 if (sort_fields.length() == 0) 422 return; 423 sort_fields += '\0'; 424 const char *sf = sort_fields.contents(); 425 while (*sf != '\0') { 426 sort_key += SORT_SEP; 427 char f = *sf++; 428 int n = 1; 429 if (*sf == '+') { 430 n = INT_MAX; 431 sf++; 432 } 433 else if (csdigit(*sf)) { 434 char *ptr; 435 long l = strtol(sf, &ptr, 10); 436 if (l == 0 && ptr == sf) 437 ; 438 else { 439 sf = ptr; 440 if (l < 0) { 441 n = 1; 442 } 443 else { 444 n = int(l); 445 } 446 } 447 } 448 if (f == '.') 449 sortify_label(label.contents(), label.length(), sort_key); 450 else if (f == AUTHOR_FIELDS[0]) 451 sortify_authors(n, sort_key); 452 else 453 sortify_field(f, n, sort_key); 454 } 455 sort_fields.set_length(sort_fields.length() - 1); 456 } 457 458 void reference::sortify_authors(int n, string &result) const 459 { 460 for (const char *p = AUTHOR_FIELDS; *p != '\0'; p++) 461 if (contains_field(*p)) { 462 sortify_field(*p, n, result); 463 return; 464 } 465 sortify_field(AUTHOR_FIELDS[0], n, result); 466 } 467 468 void reference::canonicalize_authors(string &result) const 469 { 470 int len = result.length(); 471 sortify_authors(INT_MAX, result); 472 if (result.length() > len) 473 result += SORT_SUB_SEP; 474 } 475 476 void reference::sortify_field(unsigned char f, int n, string &result) const 477 { 478 typedef void (*sortify_t)(const char *, int, string &); 479 sortify_t sortifier = sortify_other; 480 switch (f) { 481 case 'A': 482 case 'E': 483 sortifier = sortify_name; 484 break; 485 case 'D': 486 sortifier = sortify_date; 487 break; 488 case 'B': 489 case 'J': 490 case 'T': 491 sortifier = sortify_title; 492 break; 493 } 494 int fi = field_index[(unsigned char)f]; 495 if (fi != NULL_FIELD_INDEX) { 496 string &str = field[fi]; 497 const char *start = str.contents(); 498 const char *end = start + str.length(); 499 for (int i = 0; i < n && start < end; i++) { 500 const char *p = start; 501 while (start < end && *start != FIELD_SEPARATOR) 502 start++; 503 if (i > 0) 504 result += SORT_SUB_SEP; 505 (*sortifier)(p, start - p, result); 506 if (start < end) 507 start++; 508 } 509 } 510 } 511 512 int compare_reference(const reference &r1, const reference &r2) 513 { 514 assert(r1.no >= 0); 515 assert(r2.no >= 0); 516 const char *s1 = r1.sort_key.contents(); 517 int n1 = r1.sort_key.length(); 518 const char *s2 = r2.sort_key.contents(); 519 int n2 = r2.sort_key.length(); 520 for (; n1 > 0 && n2 > 0; --n1, --n2, ++s1, ++s2) 521 if (*s1 != *s2) 522 return (int)(unsigned char)*s1 - (int)(unsigned char)*s2; 523 if (n2 > 0) 524 return -1; 525 if (n1 > 0) 526 return 1; 527 return r1.no - r2.no; 528 } 529 530 int same_reference(const reference &r1, const reference &r2) 531 { 532 if (!r1.rid.is_null() && r1.rid == r2.rid) 533 return 1; 534 if (r1.h != r2.h) 535 return 0; 536 if (r1.nfields != r2.nfields) 537 return 0; 538 int i = 0; 539 for (i = 0; i < 256; i++) 540 if (r1.field_index != r2.field_index) 541 return 0; 542 for (i = 0; i < r1.nfields; i++) 543 if (r1.field[i] != r2.field[i]) 544 return 0; 545 return 1; 546 } 547 548 const char *find_last_name(const char *start, const char *end, 549 const char **endp) 550 { 551 const char *ptr = start; 552 const char *last_word = start; 553 for (;;) { 554 const char *token_start = ptr; 555 if (!get_token(&ptr, end)) 556 break; 557 if (ptr - token_start == 1) { 558 if (*token_start == ',') { 559 *endp = token_start; 560 return last_word; 561 } 562 else if (*token_start == ' ' || *token_start == '\n') { 563 if (ptr < end && *ptr != ' ' && *ptr != '\n') 564 last_word = ptr; 565 } 566 } 567 } 568 *endp = end; 569 return last_word; 570 } 571 572 void abbreviate_name(const char *ptr, const char *end, string &result) 573 { 574 const char *last_name_end; 575 const char *last_name_start = find_last_name(ptr, end, &last_name_end); 576 int need_period = 0; 577 for (;;) { 578 const char *token_start = ptr; 579 if (!get_token(&ptr, last_name_start)) 580 break; 581 const token_info *ti = lookup_token(token_start, ptr); 582 if (need_period) { 583 if ((ptr - token_start == 1 && *token_start == ' ') 584 || (ptr - token_start == 2 && token_start[0] == '\\' 585 && token_start[1] == ' ')) 586 continue; 587 if (ti->is_upper()) 588 result += period_before_initial; 589 else 590 result += period_before_other; 591 need_period = 0; 592 } 593 result.append(token_start, ptr - token_start); 594 if (ti->is_upper()) { 595 const char *lower_ptr = ptr; 596 int first_token = 1; 597 for (;;) { 598 token_start = ptr; 599 if (!get_token(&ptr, last_name_start)) 600 break; 601 if ((ptr - token_start == 1 && *token_start == ' ') 602 || (ptr - token_start == 2 && token_start[0] == '\\' 603 && token_start[1] == ' ')) 604 break; 605 ti = lookup_token(token_start, ptr); 606 if (ti->is_hyphen()) { 607 const char *ptr1 = ptr; 608 if (get_token(&ptr1, last_name_start)) { 609 ti = lookup_token(ptr, ptr1); 610 if (ti->is_upper()) { 611 result += period_before_hyphen; 612 result.append(token_start, ptr1 - token_start); 613 ptr = ptr1; 614 } 615 } 616 } 617 else if (ti->is_upper()) { 618 // MacDougal -> MacD. 619 result.append(lower_ptr, ptr - lower_ptr); 620 lower_ptr = ptr; 621 first_token = 1; 622 } 623 else if (first_token && ti->is_accent()) { 624 result.append(token_start, ptr - token_start); 625 lower_ptr = ptr; 626 } 627 first_token = 0; 628 } 629 need_period = 1; 630 } 631 } 632 if (need_period) 633 result += period_before_last_name; 634 result.append(last_name_start, end - last_name_start); 635 } 636 637 static void abbreviate_names(string &result) 638 { 639 string str; 640 str.move(result); 641 const char *ptr = str.contents(); 642 const char *end = ptr + str.length(); 643 while (ptr < end) { 644 const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr); 645 if (name_end == 0) 646 name_end = end; 647 abbreviate_name(ptr, name_end, result); 648 if (name_end >= end) 649 break; 650 ptr = name_end + 1; 651 result += FIELD_SEPARATOR; 652 } 653 } 654 655 void reverse_name(const char *ptr, const char *name_end, string &result) 656 { 657 const char *last_name_end; 658 const char *last_name_start = find_last_name(ptr, name_end, &last_name_end); 659 result.append(last_name_start, last_name_end - last_name_start); 660 while (last_name_start > ptr 661 && (last_name_start[-1] == ' ' || last_name_start[-1] == '\n')) 662 last_name_start--; 663 if (last_name_start > ptr) { 664 result += ", "; 665 result.append(ptr, last_name_start - ptr); 666 } 667 if (last_name_end < name_end) 668 result.append(last_name_end, name_end - last_name_end); 669 } 670 671 void reverse_names(string &result, int n) 672 { 673 if (n <= 0) 674 return; 675 string str; 676 str.move(result); 677 const char *ptr = str.contents(); 678 const char *end = ptr + str.length(); 679 while (ptr < end) { 680 if (--n < 0) { 681 result.append(ptr, end - ptr); 682 break; 683 } 684 const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr); 685 if (name_end == 0) 686 name_end = end; 687 reverse_name(ptr, name_end, result); 688 if (name_end >= end) 689 break; 690 ptr = name_end + 1; 691 result += FIELD_SEPARATOR; 692 } 693 } 694 695 // Return number of field separators. 696 697 int join_fields(string &f) 698 { 699 const char *ptr = f.contents(); 700 int len = f.length(); 701 int nfield_seps = 0; 702 int j; 703 for (j = 0; j < len; j++) 704 if (ptr[j] == FIELD_SEPARATOR) 705 nfield_seps++; 706 if (nfield_seps == 0) 707 return 0; 708 string temp; 709 int field_seps_left = nfield_seps; 710 for (j = 0; j < len; j++) { 711 if (ptr[j] == FIELD_SEPARATOR) { 712 if (nfield_seps == 1) 713 temp += join_authors_exactly_two; 714 else if (--field_seps_left == 0) 715 temp += join_authors_last_two; 716 else 717 temp += join_authors_default; 718 } 719 else 720 temp += ptr[j]; 721 } 722 f = temp; 723 return nfield_seps; 724 } 725 726 void uppercase(const char *start, const char *end, string &result) 727 { 728 for (;;) { 729 const char *token_start = start; 730 if (!get_token(&start, end)) 731 break; 732 const token_info *ti = lookup_token(token_start, start); 733 ti->upper_case(token_start, start, result); 734 } 735 } 736 737 void lowercase(const char *start, const char *end, string &result) 738 { 739 for (;;) { 740 const char *token_start = start; 741 if (!get_token(&start, end)) 742 break; 743 const token_info *ti = lookup_token(token_start, start); 744 ti->lower_case(token_start, start, result); 745 } 746 } 747 748 void capitalize(const char *ptr, const char *end, string &result) 749 { 750 int in_small_point_size = 0; 751 for (;;) { 752 const char *start = ptr; 753 if (!get_token(&ptr, end)) 754 break; 755 const token_info *ti = lookup_token(start, ptr); 756 const char *char_end = ptr; 757 int is_lower = ti->is_lower(); 758 if ((is_lower || ti->is_upper()) && get_token(&ptr, end)) { 759 const token_info *ti2 = lookup_token(char_end, ptr); 760 if (!ti2->is_accent()) 761 ptr = char_end; 762 } 763 if (is_lower) { 764 if (!in_small_point_size) { 765 result += "\\s-2"; 766 in_small_point_size = 1; 767 } 768 ti->upper_case(start, char_end, result); 769 result.append(char_end, ptr - char_end); 770 } 771 else { 772 if (in_small_point_size) { 773 result += "\\s+2"; 774 in_small_point_size = 0; 775 } 776 result.append(start, ptr - start); 777 } 778 } 779 if (in_small_point_size) 780 result += "\\s+2"; 781 } 782 783 void capitalize_field(string &str) 784 { 785 string temp; 786 capitalize(str.contents(), str.contents() + str.length(), temp); 787 str.move(temp); 788 } 789 790 int is_terminated(const char *ptr, const char *end) 791 { 792 const char *last_token = end; 793 for (;;) { 794 const char *p = ptr; 795 if (!get_token(&ptr, end)) 796 break; 797 last_token = p; 798 } 799 return end - last_token == 1 800 && (*last_token == '.' || *last_token == '!' || *last_token == '?'); 801 } 802 803 void reference::output(FILE *fp) 804 { 805 fputs(".]-\n", fp); 806 for (int i = 0; i < 256; i++) 807 if (field_index[i] != NULL_FIELD_INDEX && i != annotation_field) { 808 string &f = field[field_index[i]]; 809 if (!csdigit(i)) { 810 int j = reverse_fields.search(i); 811 if (j >= 0) { 812 int n; 813 int len = reverse_fields.length(); 814 if (++j < len && csdigit(reverse_fields[j])) { 815 n = reverse_fields[j] - '0'; 816 for (++j; j < len && csdigit(reverse_fields[j]); j++) 817 // should check for overflow 818 n = n*10 + reverse_fields[j] - '0'; 819 } 820 else 821 n = INT_MAX; 822 reverse_names(f, n); 823 } 824 } 825 int is_multiple = join_fields(f) > 0; 826 if (capitalize_fields.search(i) >= 0) 827 capitalize_field(f); 828 if (memchr(f.contents(), '\n', f.length()) == 0) { 829 fprintf(fp, ".ds [%c ", i); 830 if (f[0] == ' ' || f[0] == '\\' || f[0] == '"') 831 putc('"', fp); 832 put_string(f, fp); 833 putc('\n', fp); 834 } 835 else { 836 fprintf(fp, ".de [%c\n", i); 837 put_string(f, fp); 838 fputs("..\n", fp); 839 } 840 if (i == 'P') { 841 int multiple_pages = 0; 842 const char *s = f.contents(); 843 const char *end = f.contents() + f.length(); 844 for (;;) { 845 const char *token_start = s; 846 if (!get_token(&s, end)) 847 break; 848 const token_info *ti = lookup_token(token_start, s); 849 if (ti->is_hyphen() || ti->is_range_sep()) { 850 multiple_pages = 1; 851 break; 852 } 853 } 854 fprintf(fp, ".nr [P %d\n", multiple_pages); 855 } 856 else if (i == 'E') 857 fprintf(fp, ".nr [E %d\n", is_multiple); 858 } 859 for (const char *p = "TAO"; *p; p++) { 860 int fi = field_index[(unsigned char)*p]; 861 if (fi != NULL_FIELD_INDEX) { 862 string &f = field[fi]; 863 fprintf(fp, ".nr [%c %d\n", *p, 864 is_terminated(f.contents(), f.contents() + f.length())); 865 } 866 } 867 int t = classify(); 868 fprintf(fp, ".][ %d %s\n", t, reference_types[t]); 869 if (annotation_macro.length() > 0 && annotation_field >= 0 870 && field_index[annotation_field] != NULL_FIELD_INDEX) { 871 putc('.', fp); 872 put_string(annotation_macro, fp); 873 putc('\n', fp); 874 put_string(field[field_index[annotation_field]], fp); 875 } 876 } 877 878 void reference::print_sort_key_comment(FILE *fp) 879 { 880 fputs(".\\\"", fp); 881 put_string(sort_key, fp); 882 putc('\n', fp); 883 } 884 885 const char *find_year(const char *start, const char *end, const char **endp) 886 { 887 for (;;) { 888 while (start < end && !csdigit(*start)) 889 start++; 890 const char *ptr = start; 891 if (start == end) 892 break; 893 while (ptr < end && csdigit(*ptr)) 894 ptr++; 895 if (ptr - start == 4 || ptr - start == 3 896 || (ptr - start == 2 897 && (start[0] >= '4' || (start[0] == '3' && start[1] >= '2')))) { 898 *endp = ptr; 899 return start; 900 } 901 start = ptr; 902 } 903 return 0; 904 } 905 906 static const char *find_day(const char *start, const char *end, 907 const char **endp) 908 { 909 for (;;) { 910 while (start < end && !csdigit(*start)) 911 start++; 912 const char *ptr = start; 913 if (start == end) 914 break; 915 while (ptr < end && csdigit(*ptr)) 916 ptr++; 917 if ((ptr - start == 1 && start[0] != '0') 918 || (ptr - start == 2 && 919 (start[0] == '1' 920 || start[0] == '2' 921 || (start[0] == '3' && start[1] <= '1') 922 || (start[0] == '0' && start[1] != '0')))) { 923 *endp = ptr; 924 return start; 925 } 926 start = ptr; 927 } 928 return 0; 929 } 930 931 static int find_month(const char *start, const char *end) 932 { 933 static const char *months[] = { 934 "january", 935 "february", 936 "march", 937 "april", 938 "may", 939 "june", 940 "july", 941 "august", 942 "september", 943 "october", 944 "november", 945 "december", 946 }; 947 for (;;) { 948 while (start < end && !csalpha(*start)) 949 start++; 950 const char *ptr = start; 951 if (start == end) 952 break; 953 while (ptr < end && csalpha(*ptr)) 954 ptr++; 955 if (ptr - start >= 3) { 956 for (unsigned int i = 0; i < sizeof(months)/sizeof(months[0]); i++) { 957 const char *q = months[i]; 958 const char *p = start; 959 for (; p < ptr; p++, q++) 960 if (cmlower(*p) != *q) 961 break; 962 if (p >= ptr) 963 return i; 964 } 965 } 966 start = ptr; 967 } 968 return -1; 969 } 970 971 int reference::contains_field(char c) const 972 { 973 return field_index[(unsigned char)c] != NULL_FIELD_INDEX; 974 } 975 976 int reference::classify() 977 { 978 if (contains_field('J')) 979 return JOURNAL_ARTICLE; 980 if (contains_field('B')) 981 return ARTICLE_IN_BOOK; 982 if (contains_field('G')) 983 return TECH_REPORT; 984 if (contains_field('R')) 985 return TECH_REPORT; 986 if (contains_field('I')) 987 return BOOK; 988 if (contains_field('M')) 989 return BELL_TM; 990 return OTHER; 991 } 992 993 const char *reference::get_year(const char **endp) const 994 { 995 if (field_index['D'] != NULL_FIELD_INDEX) { 996 string &date = field[field_index['D']]; 997 const char *start = date.contents(); 998 const char *end = start + date.length(); 999 return find_year(start, end, endp); 1000 } 1001 else 1002 return 0; 1003 } 1004 1005 const char *reference::get_field(unsigned char c, const char **endp) const 1006 { 1007 if (field_index[c] != NULL_FIELD_INDEX) { 1008 string &f = field[field_index[c]]; 1009 const char *start = f.contents(); 1010 *endp = start + f.length(); 1011 return start; 1012 } 1013 else 1014 return 0; 1015 } 1016 1017 const char *reference::get_date(const char **endp) const 1018 { 1019 return get_field('D', endp); 1020 } 1021 1022 const char *nth_field(int i, const char *start, const char **endp) 1023 { 1024 while (--i >= 0) { 1025 start = (char *)memchr(start, FIELD_SEPARATOR, *endp - start); 1026 if (!start) 1027 return 0; 1028 start++; 1029 } 1030 const char *e = (char *)memchr(start, FIELD_SEPARATOR, *endp - start); 1031 if (e) 1032 *endp = e; 1033 return start; 1034 } 1035 1036 const char *reference::get_author(int i, const char **endp) const 1037 { 1038 for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) { 1039 const char *start = get_field(*f, endp); 1040 if (start) { 1041 if (strchr(MULTI_FIELD_NAMES, *f) != 0) 1042 return nth_field(i, start, endp); 1043 else if (i == 0) 1044 return start; 1045 else 1046 return 0; 1047 } 1048 } 1049 return 0; 1050 } 1051 1052 const char *reference::get_author_last_name(int i, const char **endp) const 1053 { 1054 for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) { 1055 const char *start = get_field(*f, endp); 1056 if (start) { 1057 if (strchr(MULTI_FIELD_NAMES, *f) != 0) { 1058 start = nth_field(i, start, endp); 1059 if (!start) 1060 return 0; 1061 } 1062 if (*f == 'A') 1063 return find_last_name(start, *endp, endp); 1064 else 1065 return start; 1066 } 1067 } 1068 return 0; 1069 } 1070 1071 void reference::set_date(string &d) 1072 { 1073 if (d.length() == 0) 1074 delete_field('D'); 1075 else 1076 insert_field('D', d); 1077 } 1078 1079 int same_year(const reference &r1, const reference &r2) 1080 { 1081 const char *ye1; 1082 const char *ys1 = r1.get_year(&ye1); 1083 const char *ye2; 1084 const char *ys2 = r2.get_year(&ye2); 1085 if (ys1 == 0) { 1086 if (ys2 == 0) 1087 return same_date(r1, r2); 1088 else 1089 return 0; 1090 } 1091 else if (ys2 == 0) 1092 return 0; 1093 else if (ye1 - ys1 != ye2 - ys2) 1094 return 0; 1095 else 1096 return memcmp(ys1, ys2, ye1 - ys1) == 0; 1097 } 1098 1099 int same_date(const reference &r1, const reference &r2) 1100 { 1101 const char *e1; 1102 const char *s1 = r1.get_date(&e1); 1103 const char *e2; 1104 const char *s2 = r2.get_date(&e2); 1105 if (s1 == 0) 1106 return s2 == 0; 1107 else if (s2 == 0) 1108 return 0; 1109 else if (e1 - s1 != e2 - s2) 1110 return 0; 1111 else 1112 return memcmp(s1, s2, e1 - s1) == 0; 1113 } 1114 1115 const char *reference::get_sort_field(int i, int si, int ssi, 1116 const char **endp) const 1117 { 1118 const char *start = sort_key.contents(); 1119 const char *end = start + sort_key.length(); 1120 if (i < 0) { 1121 *endp = end; 1122 return start; 1123 } 1124 while (--i >= 0) { 1125 start = (char *)memchr(start, SORT_SEP, end - start); 1126 if (!start) 1127 return 0; 1128 start++; 1129 } 1130 const char *e = (char *)memchr(start, SORT_SEP, end - start); 1131 if (e) 1132 end = e; 1133 if (si < 0) { 1134 *endp = end; 1135 return start; 1136 } 1137 while (--si >= 0) { 1138 start = (char *)memchr(start, SORT_SUB_SEP, end - start); 1139 if (!start) 1140 return 0; 1141 start++; 1142 } 1143 e = (char *)memchr(start, SORT_SUB_SEP, end - start); 1144 if (e) 1145 end = e; 1146 if (ssi < 0) { 1147 *endp = end; 1148 return start; 1149 } 1150 while (--ssi >= 0) { 1151 start = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start); 1152 if (!start) 1153 return 0; 1154 start++; 1155 } 1156 e = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start); 1157 if (e) 1158 end = e; 1159 *endp = end; 1160 return start; 1161 } 1162 1163