1*48301Sbostic /*-
2*48301Sbostic * %sccs.include.proprietary.c%
3*48301Sbostic */
4*48301Sbostic
512262Stut #ifndef lint
6*48301Sbostic static char sccsid[] = "@(#)sortbib.c 4.6 (Berkeley) 04/18/91";
7*48301Sbostic #endif /* not lint */
812262Stut
912262Stut #include <stdio.h>
1012262Stut #include <signal.h>
1147074Sdonn #include <string.h>
1237895Sbostic #include "pathnames.h"
1337895Sbostic
1412262Stut #define BUF BUFSIZ
1512262Stut #define MXFILES 16
1612262Stut
1712262Stut char *tempfile; /* temporary file for sorting keys */
1812262Stut char *keystr = "AD"; /* default sorting on author and date */
1912262Stut int multauth = 0; /* by default sort on senior author only */
2012262Stut int oneauth; /* has there been author in the record? */
2112262Stut
main(argc,argv)2212262Stut main(argc, argv) /* sortbib: sort bibliographic database in place */
2312262Stut int argc;
2412262Stut char *argv[];
2512262Stut {
2612262Stut FILE *fp[MXFILES], *tfp, *fopen();
2746849Sbostic int i;
2846849Sbostic void onintr();
2912262Stut char *mktemp();
3012262Stut
3112262Stut if (argc == 1) /* can't use stdin for seeking anyway */
3212262Stut {
3312262Stut puts("Usage: sortbib [-sKEYS] database [...]");
3412262Stut puts("\t-s: sort by fields in KEYS (default is AD)");
3512262Stut exit(1);
3612262Stut }
3712262Stut if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
3812262Stut {
3912262Stut keystr = argv[1]+2;
4012262Stut eval(keystr); /* evaluate A+ for multiple authors */
4112262Stut argv++; argc--;
4212262Stut }
4312262Stut if (argc > MXFILES+1) /* too many open file streams */
4412262Stut {
4512262Stut fprintf(stderr,
4612262Stut "sortbib: More than %d databases specified\n", MXFILES);
4712262Stut exit(1);
4812262Stut }
4912262Stut for (i = 1; i < argc; i++) /* open files in arg list */
5012262Stut if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
5112262Stut error(argv[i]);
5247074Sdonn tempfile = mktemp(strdup(_PATH_TMPS)); /* tempfile for sorting keys */
5312262Stut if (signal(SIGINT,SIG_IGN) != SIG_IGN) /* remove if interrupted */
5412262Stut signal(SIGINT, onintr);
5512262Stut if ((tfp = fopen(tempfile, "w")) == NULL)
5612262Stut error(tempfile);
5712262Stut for (i = 0; i < argc-1; i++) /* read keys from bib files */
5812262Stut sortbib(fp[i], tfp, i);
5912262Stut fclose(tfp);
6012262Stut deliver(fp, tfp); /* do disk seeks and read from biblio files */
6112262Stut unlink(tempfile);
6212262Stut exit(0);
6312262Stut }
6412262Stut
6512262Stut int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */
6612262Stut
sortbib(fp,tfp,i)6712262Stut sortbib(fp, tfp, i) /* read records, prepare list for sorting */
6812262Stut FILE *fp, *tfp;
6912262Stut int i;
7012262Stut {
7112262Stut long offset, lastoffset = 0, ftell(); /* byte offsets in file */
7212262Stut int length, newrec, recno = 0; /* reclen, new rec'd?, number */
7312262Stut char line[BUF], fld[4][BUF]; /* one line, the sort fields */
7412262Stut
7512262Stut /* measure byte offset, then get new line */
7612262Stut while (offset = ftell(fp), fgets(line, BUF, fp))
7712262Stut {
7812262Stut if (recno == 0) /* accept record w/o initial newline */
7912262Stut newrec = 1;
8012262Stut if (line[0] == '\n') /* accept null line record separator */
8112262Stut {
8212262Stut if (!rsmode)
8312262Stut rsmode = 1; /* null line mode */
8412262Stut if (rsmode == 1)
8512262Stut newrec = 1;
8612262Stut }
8712262Stut if (line[0] == '.' && line[1] == '[') /* also accept .[ .] */
8812262Stut {
8912262Stut if (!rsmode)
9012262Stut rsmode = 2; /* bracket pair mode */
9112262Stut if (rsmode == 2)
9212262Stut newrec = 1;
9312262Stut }
9412262Stut if (newrec) /* by whatever means above */
9512262Stut {
9612262Stut newrec = 0;
9712262Stut length = offset - lastoffset; /* measure rec len */
9812262Stut if (length > BUF*8)
9912262Stut {
10012262Stut fprintf(stderr,
10112262Stut "sortbib: record %d longer than %d (%d)\n",
10212262Stut recno, BUF*8, length);
10312262Stut exit(1);
10412262Stut }
10512262Stut if (recno++) /* info for sorting */
10612262Stut {
10712262Stut fprintf(tfp, "%d %D %d : %s %s %s %s\n",
10812262Stut i, lastoffset, length,
10912262Stut fld[0], fld[1], fld[2], fld[3]);
11012262Stut if (ferror(tfp))
11112262Stut error(tempfile);
11212262Stut }
11312262Stut *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
11412262Stut oneauth = 0; /* reset number of authors */
11512262Stut lastoffset = offset; /* save for next time */
11612262Stut }
11712262Stut if (line[0] == '%') /* parse out fields to be sorted */
11812262Stut parse(line, fld);
11912262Stut }
12012262Stut offset = ftell(fp); /* measure byte offset at EOF */
12112262Stut length = offset - lastoffset; /* measure final record length */
12212262Stut if (length > BUF*8)
12312262Stut {
12412262Stut fprintf(stderr, "sortbib: record %d longer than %d (%d)\n",
12512262Stut recno, BUF*8, length);
12612262Stut exit(1);
12712262Stut }
12812262Stut if (line[0] != '\n') /* ignore null line just before EOF */
12912262Stut {
13012262Stut fprintf(tfp, "%d %D %d : %s %s %s %s\n",
13112262Stut i, lastoffset, length,
13212262Stut fld[0], fld[1], fld[2], fld[3]);
13312262Stut if (ferror(tfp))
13412262Stut error(tempfile); /* disk error in /tmp */
13512262Stut }
13612262Stut }
13712262Stut
deliver(fp,tfp)13812262Stut deliver(fp, tfp) /* deliver sorted entries out of database(s) */
13912262Stut FILE *fp[], *tfp;
14012262Stut {
14112262Stut char str[BUF], buff[BUF*8]; /* for tempfile & databases */
14232508Sbostic char cmd[80]; /* for using system sort command */
14312262Stut long int offset;
14412262Stut int i, length;
14512262Stut
14612262Stut /* when sorting, ignore case distinctions; tab char is ':' */
14712262Stut sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile);
14812262Stut if (system(cmd) == 127)
14912262Stut error("sortbib");
15012262Stut tfp = fopen(tempfile, "r");
15112262Stut while (fgets(str, sizeof(str), tfp))
15212262Stut {
15312262Stut /* get file pointer, record offset, and length */
15412262Stut if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3)
15512262Stut error("sortbib: sorting error");
15612262Stut /* seek to proper disk location in proper file */
15712262Stut if (fseek(fp[i], offset, 0) == -1)
15812262Stut error("sortbib");
15912262Stut /* read exactly one record from bibliography */
16012262Stut if (fread(buff, sizeof(*buff), length, fp[i]) == 0)
16112262Stut error("sortbib");
16212262Stut /* add newline between unseparated records */
16312262Stut if (buff[0] != '\n' && rsmode == 1)
16412262Stut putchar('\n');
16512262Stut /* write record buffer to standard output */
16612262Stut if (fwrite(buff, sizeof(*buff), length, stdout) == 0)
16712262Stut error("sortbib");
16812262Stut }
16912262Stut }
17012262Stut
parse(line,fld)17112262Stut parse(line, fld) /* get fields out of line, prepare for sorting */
17212262Stut char line[];
17312262Stut char fld[][BUF];
17412262Stut {
17512262Stut char wd[8][BUF/4], *strcat();
17612262Stut int n, i, j;
17712262Stut
17812262Stut for (i = 0; i < 8; i++) /* zap out old strings */
17912262Stut *wd[i] = NULL;
18012262Stut n = sscanf(line, "%s %s %s %s %s %s %s %s",
18112262Stut wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
18212262Stut for (i = 0; i < 4; i++)
18312262Stut {
18412262Stut if (wd[0][1] == keystr[i])
18512262Stut {
18612262Stut if (wd[0][1] == 'A')
18712262Stut {
18812262Stut if (oneauth && !multauth) /* no repeat */
18912262Stut break;
19012262Stut else if (oneauth) /* mult auths */
19112262Stut strcat(fld[i], "~~");
19212262Stut if (!endcomma(wd[n-2])) /* surname */
19312262Stut strcat(fld[i], wd[n-1]);
19412262Stut else { /* jr. or ed. */
19512262Stut strcat(fld[i], wd[n-2]);
19612262Stut n--;
19712262Stut }
19812262Stut strcat(fld[i], " ");
19912262Stut for (j = 1; j < n-1; j++)
20012262Stut strcat(fld[i], wd[j]);
20112262Stut oneauth = 1;
20212262Stut }
20312262Stut else if (wd[0][1] == 'D')
20412262Stut {
20512262Stut strcat(fld[i], wd[n-1]); /* year */
20612262Stut if (n > 2)
20712262Stut strcat(fld[i], wd[1]); /* month */
20812262Stut }
20912262Stut else if (wd[0][1] == 'T' || wd[0][1] == 'J')
21012262Stut {
21112262Stut j = 1;
21212262Stut if (article(wd[1])) /* skip article */
21312262Stut j++;
21412262Stut for (; j < n; j++)
21512262Stut strcat(fld[i], wd[j]);
21612262Stut }
21712262Stut else /* any other field */
21812262Stut for (j = 1; j < n; j++)
21912262Stut strcat(fld[i], wd[j]);
22012262Stut }
22112262Stut /* %Q quorporate or queer author - unreversed %A */
22212262Stut else if (wd[0][1] == 'Q' && keystr[i] == 'A')
22312262Stut for (j = 1; j < n; j++)
22412262Stut strcat(fld[i], wd[j]);
22512262Stut }
22612262Stut }
22712262Stut
article(str)22812262Stut article(str) /* see if string contains an article */
22912262Stut char *str;
23012262Stut {
23112262Stut if (strcmp("The", str) == 0) /* English */
23212262Stut return(1);
23312262Stut if (strcmp("A", str) == 0)
23412262Stut return(1);
23512262Stut if (strcmp("An", str) == 0)
23612262Stut return(1);
23712262Stut if (strcmp("Le", str) == 0) /* French */
23812262Stut return(1);
23912262Stut if (strcmp("La", str) == 0)
24012262Stut return(1);
24112262Stut if (strcmp("Der", str) == 0) /* German */
24212262Stut return(1);
24312262Stut if (strcmp("Die", str) == 0)
24412262Stut return(1);
24512262Stut if (strcmp("Das", str) == 0)
24612262Stut return(1);
24712262Stut if (strcmp("El", str) == 0) /* Spanish */
24812262Stut return(1);
24912262Stut if (strcmp("Den", str) == 0) /* Scandinavian */
25012262Stut return(1);
25112262Stut return(0);
25212262Stut }
25312262Stut
eval(keystr)25412262Stut eval(keystr) /* evaluate key string for A+ marking */
25512262Stut char keystr[];
25612262Stut {
25712262Stut int i, j;
25812262Stut
25912262Stut for (i = 0, j = 0; keystr[i]; i++, j++)
26012262Stut {
26112262Stut if (keystr[i] == '+')
26212262Stut {
26312262Stut multauth = 1;
26412262Stut i++;
26512262Stut }
26612262Stut keystr[j] = keystr[i];
26712262Stut }
26812262Stut keystr[j] = NULL;
26912262Stut }
27012262Stut
error(s)27112262Stut error(s) /* exit in case of various system errors */
27212262Stut char *s;
27312262Stut {
27412262Stut perror(s);
27512262Stut exit(1);
27612262Stut }
27712262Stut
27846849Sbostic void
onintr()27912262Stut onintr() /* remove tempfile in case of interrupt */
28012262Stut {
28112262Stut fprintf(stderr, "\nInterrupt\n");
28212262Stut unlink(tempfile);
28312262Stut exit(1);
28412262Stut }
28512262Stut
endcomma(str)28612262Stut endcomma(str)
28712262Stut char *str;
28812262Stut {
28912262Stut int n;
29012262Stut
29112262Stut n = strlen(str) - 1;
29212262Stut if (str[n] == ',')
29312262Stut {
29412262Stut str[n] = NULL;
29512262Stut return(1);
29612262Stut }
29712262Stut return(0);
29812262Stut }
299