112262Stut #ifndef lint 2*47074Sdonn static char *sccsid = "@(#)sortbib.c 4.5 (Berkeley) 03/07/91"; 312262Stut #endif 412262Stut 512262Stut #include <stdio.h> 612262Stut #include <signal.h> 7*47074Sdonn #include <string.h> 837895Sbostic #include "pathnames.h" 937895Sbostic 1012262Stut #define BUF BUFSIZ 1112262Stut #define MXFILES 16 1212262Stut 1312262Stut char *tempfile; /* temporary file for sorting keys */ 1412262Stut char *keystr = "AD"; /* default sorting on author and date */ 1512262Stut int multauth = 0; /* by default sort on senior author only */ 1612262Stut int oneauth; /* has there been author in the record? */ 1712262Stut 1812262Stut main(argc, argv) /* sortbib: sort bibliographic database in place */ 1912262Stut int argc; 2012262Stut char *argv[]; 2112262Stut { 2212262Stut FILE *fp[MXFILES], *tfp, *fopen(); 2346849Sbostic int i; 2446849Sbostic void onintr(); 2512262Stut char *mktemp(); 2612262Stut 2712262Stut if (argc == 1) /* can't use stdin for seeking anyway */ 2812262Stut { 2912262Stut puts("Usage: sortbib [-sKEYS] database [...]"); 3012262Stut puts("\t-s: sort by fields in KEYS (default is AD)"); 3112262Stut exit(1); 3212262Stut } 3312262Stut if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') 3412262Stut { 3512262Stut keystr = argv[1]+2; 3612262Stut eval(keystr); /* evaluate A+ for multiple authors */ 3712262Stut argv++; argc--; 3812262Stut } 3912262Stut if (argc > MXFILES+1) /* too many open file streams */ 4012262Stut { 4112262Stut fprintf(stderr, 4212262Stut "sortbib: More than %d databases specified\n", MXFILES); 4312262Stut exit(1); 4412262Stut } 4512262Stut for (i = 1; i < argc; i++) /* open files in arg list */ 4612262Stut if ((fp[i-1] = fopen(argv[i], "r")) == NULL) 4712262Stut error(argv[i]); 48*47074Sdonn tempfile = mktemp(strdup(_PATH_TMPS)); /* tempfile for sorting keys */ 4912262Stut if (signal(SIGINT,SIG_IGN) != SIG_IGN) /* remove if interrupted */ 5012262Stut signal(SIGINT, onintr); 5112262Stut if ((tfp = fopen(tempfile, "w")) == NULL) 5212262Stut error(tempfile); 5312262Stut for (i = 0; i < argc-1; i++) /* read keys from bib files */ 5412262Stut sortbib(fp[i], tfp, i); 5512262Stut fclose(tfp); 5612262Stut deliver(fp, tfp); /* do disk seeks and read from biblio files */ 5712262Stut unlink(tempfile); 5812262Stut exit(0); 5912262Stut } 6012262Stut 6112262Stut int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */ 6212262Stut 6312262Stut sortbib(fp, tfp, i) /* read records, prepare list for sorting */ 6412262Stut FILE *fp, *tfp; 6512262Stut int i; 6612262Stut { 6712262Stut long offset, lastoffset = 0, ftell(); /* byte offsets in file */ 6812262Stut int length, newrec, recno = 0; /* reclen, new rec'd?, number */ 6912262Stut char line[BUF], fld[4][BUF]; /* one line, the sort fields */ 7012262Stut 7112262Stut /* measure byte offset, then get new line */ 7212262Stut while (offset = ftell(fp), fgets(line, BUF, fp)) 7312262Stut { 7412262Stut if (recno == 0) /* accept record w/o initial newline */ 7512262Stut newrec = 1; 7612262Stut if (line[0] == '\n') /* accept null line record separator */ 7712262Stut { 7812262Stut if (!rsmode) 7912262Stut rsmode = 1; /* null line mode */ 8012262Stut if (rsmode == 1) 8112262Stut newrec = 1; 8212262Stut } 8312262Stut if (line[0] == '.' && line[1] == '[') /* also accept .[ .] */ 8412262Stut { 8512262Stut if (!rsmode) 8612262Stut rsmode = 2; /* bracket pair mode */ 8712262Stut if (rsmode == 2) 8812262Stut newrec = 1; 8912262Stut } 9012262Stut if (newrec) /* by whatever means above */ 9112262Stut { 9212262Stut newrec = 0; 9312262Stut length = offset - lastoffset; /* measure rec len */ 9412262Stut if (length > BUF*8) 9512262Stut { 9612262Stut fprintf(stderr, 9712262Stut "sortbib: record %d longer than %d (%d)\n", 9812262Stut recno, BUF*8, length); 9912262Stut exit(1); 10012262Stut } 10112262Stut if (recno++) /* info for sorting */ 10212262Stut { 10312262Stut fprintf(tfp, "%d %D %d : %s %s %s %s\n", 10412262Stut i, lastoffset, length, 10512262Stut fld[0], fld[1], fld[2], fld[3]); 10612262Stut if (ferror(tfp)) 10712262Stut error(tempfile); 10812262Stut } 10912262Stut *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL; 11012262Stut oneauth = 0; /* reset number of authors */ 11112262Stut lastoffset = offset; /* save for next time */ 11212262Stut } 11312262Stut if (line[0] == '%') /* parse out fields to be sorted */ 11412262Stut parse(line, fld); 11512262Stut } 11612262Stut offset = ftell(fp); /* measure byte offset at EOF */ 11712262Stut length = offset - lastoffset; /* measure final record length */ 11812262Stut if (length > BUF*8) 11912262Stut { 12012262Stut fprintf(stderr, "sortbib: record %d longer than %d (%d)\n", 12112262Stut recno, BUF*8, length); 12212262Stut exit(1); 12312262Stut } 12412262Stut if (line[0] != '\n') /* ignore null line just before EOF */ 12512262Stut { 12612262Stut fprintf(tfp, "%d %D %d : %s %s %s %s\n", 12712262Stut i, lastoffset, length, 12812262Stut fld[0], fld[1], fld[2], fld[3]); 12912262Stut if (ferror(tfp)) 13012262Stut error(tempfile); /* disk error in /tmp */ 13112262Stut } 13212262Stut } 13312262Stut 13412262Stut deliver(fp, tfp) /* deliver sorted entries out of database(s) */ 13512262Stut FILE *fp[], *tfp; 13612262Stut { 13712262Stut char str[BUF], buff[BUF*8]; /* for tempfile & databases */ 13832508Sbostic char cmd[80]; /* for using system sort command */ 13912262Stut long int offset; 14012262Stut int i, length; 14112262Stut 14212262Stut /* when sorting, ignore case distinctions; tab char is ':' */ 14312262Stut sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile); 14412262Stut if (system(cmd) == 127) 14512262Stut error("sortbib"); 14612262Stut tfp = fopen(tempfile, "r"); 14712262Stut while (fgets(str, sizeof(str), tfp)) 14812262Stut { 14912262Stut /* get file pointer, record offset, and length */ 15012262Stut if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3) 15112262Stut error("sortbib: sorting error"); 15212262Stut /* seek to proper disk location in proper file */ 15312262Stut if (fseek(fp[i], offset, 0) == -1) 15412262Stut error("sortbib"); 15512262Stut /* read exactly one record from bibliography */ 15612262Stut if (fread(buff, sizeof(*buff), length, fp[i]) == 0) 15712262Stut error("sortbib"); 15812262Stut /* add newline between unseparated records */ 15912262Stut if (buff[0] != '\n' && rsmode == 1) 16012262Stut putchar('\n'); 16112262Stut /* write record buffer to standard output */ 16212262Stut if (fwrite(buff, sizeof(*buff), length, stdout) == 0) 16312262Stut error("sortbib"); 16412262Stut } 16512262Stut } 16612262Stut 16712262Stut parse(line, fld) /* get fields out of line, prepare for sorting */ 16812262Stut char line[]; 16912262Stut char fld[][BUF]; 17012262Stut { 17112262Stut char wd[8][BUF/4], *strcat(); 17212262Stut int n, i, j; 17312262Stut 17412262Stut for (i = 0; i < 8; i++) /* zap out old strings */ 17512262Stut *wd[i] = NULL; 17612262Stut n = sscanf(line, "%s %s %s %s %s %s %s %s", 17712262Stut wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]); 17812262Stut for (i = 0; i < 4; i++) 17912262Stut { 18012262Stut if (wd[0][1] == keystr[i]) 18112262Stut { 18212262Stut if (wd[0][1] == 'A') 18312262Stut { 18412262Stut if (oneauth && !multauth) /* no repeat */ 18512262Stut break; 18612262Stut else if (oneauth) /* mult auths */ 18712262Stut strcat(fld[i], "~~"); 18812262Stut if (!endcomma(wd[n-2])) /* surname */ 18912262Stut strcat(fld[i], wd[n-1]); 19012262Stut else { /* jr. or ed. */ 19112262Stut strcat(fld[i], wd[n-2]); 19212262Stut n--; 19312262Stut } 19412262Stut strcat(fld[i], " "); 19512262Stut for (j = 1; j < n-1; j++) 19612262Stut strcat(fld[i], wd[j]); 19712262Stut oneauth = 1; 19812262Stut } 19912262Stut else if (wd[0][1] == 'D') 20012262Stut { 20112262Stut strcat(fld[i], wd[n-1]); /* year */ 20212262Stut if (n > 2) 20312262Stut strcat(fld[i], wd[1]); /* month */ 20412262Stut } 20512262Stut else if (wd[0][1] == 'T' || wd[0][1] == 'J') 20612262Stut { 20712262Stut j = 1; 20812262Stut if (article(wd[1])) /* skip article */ 20912262Stut j++; 21012262Stut for (; j < n; j++) 21112262Stut strcat(fld[i], wd[j]); 21212262Stut } 21312262Stut else /* any other field */ 21412262Stut for (j = 1; j < n; j++) 21512262Stut strcat(fld[i], wd[j]); 21612262Stut } 21712262Stut /* %Q quorporate or queer author - unreversed %A */ 21812262Stut else if (wd[0][1] == 'Q' && keystr[i] == 'A') 21912262Stut for (j = 1; j < n; j++) 22012262Stut strcat(fld[i], wd[j]); 22112262Stut } 22212262Stut } 22312262Stut 22412262Stut article(str) /* see if string contains an article */ 22512262Stut char *str; 22612262Stut { 22712262Stut if (strcmp("The", str) == 0) /* English */ 22812262Stut return(1); 22912262Stut if (strcmp("A", str) == 0) 23012262Stut return(1); 23112262Stut if (strcmp("An", str) == 0) 23212262Stut return(1); 23312262Stut if (strcmp("Le", str) == 0) /* French */ 23412262Stut return(1); 23512262Stut if (strcmp("La", str) == 0) 23612262Stut return(1); 23712262Stut if (strcmp("Der", str) == 0) /* German */ 23812262Stut return(1); 23912262Stut if (strcmp("Die", str) == 0) 24012262Stut return(1); 24112262Stut if (strcmp("Das", str) == 0) 24212262Stut return(1); 24312262Stut if (strcmp("El", str) == 0) /* Spanish */ 24412262Stut return(1); 24512262Stut if (strcmp("Den", str) == 0) /* Scandinavian */ 24612262Stut return(1); 24712262Stut return(0); 24812262Stut } 24912262Stut 25012262Stut eval(keystr) /* evaluate key string for A+ marking */ 25112262Stut char keystr[]; 25212262Stut { 25312262Stut int i, j; 25412262Stut 25512262Stut for (i = 0, j = 0; keystr[i]; i++, j++) 25612262Stut { 25712262Stut if (keystr[i] == '+') 25812262Stut { 25912262Stut multauth = 1; 26012262Stut i++; 26112262Stut } 26212262Stut keystr[j] = keystr[i]; 26312262Stut } 26412262Stut keystr[j] = NULL; 26512262Stut } 26612262Stut 26712262Stut error(s) /* exit in case of various system errors */ 26812262Stut char *s; 26912262Stut { 27012262Stut perror(s); 27112262Stut exit(1); 27212262Stut } 27312262Stut 27446849Sbostic void 27512262Stut onintr() /* remove tempfile in case of interrupt */ 27612262Stut { 27712262Stut fprintf(stderr, "\nInterrupt\n"); 27812262Stut unlink(tempfile); 27912262Stut exit(1); 28012262Stut } 28112262Stut 28212262Stut endcomma(str) 28312262Stut char *str; 28412262Stut { 28512262Stut int n; 28612262Stut 28712262Stut n = strlen(str) - 1; 28812262Stut if (str[n] == ',') 28912262Stut { 29012262Stut str[n] = NULL; 29112262Stut return(1); 29212262Stut } 29312262Stut return(0); 29412262Stut } 295