xref: /csrg-svn/old/refer/sortbib/sortbib.c (revision 48301)
1*48301Sbostic /*-
2*48301Sbostic  * %sccs.include.proprietary.c%
3*48301Sbostic  */
4*48301Sbostic 
512262Stut #ifndef lint
6*48301Sbostic static char sccsid[] = "@(#)sortbib.c	4.6 (Berkeley) 04/18/91";
7*48301Sbostic #endif /* not lint */
812262Stut 
912262Stut #include <stdio.h>
1012262Stut #include <signal.h>
1147074Sdonn #include <string.h>
1237895Sbostic #include "pathnames.h"
1337895Sbostic 
1412262Stut #define BUF BUFSIZ
1512262Stut #define MXFILES 16
1612262Stut 
1712262Stut char *tempfile;			/* temporary file for sorting keys */
1812262Stut char *keystr = "AD";		/* default sorting on author and date */
1912262Stut int multauth = 0;		/* by default sort on senior author only */
2012262Stut int oneauth;			/* has there been author in the record? */
2112262Stut 
main(argc,argv)2212262Stut main(argc, argv)	/* sortbib: sort bibliographic database in place */
2312262Stut int argc;
2412262Stut char *argv[];
2512262Stut {
2612262Stut 	FILE *fp[MXFILES], *tfp, *fopen();
2746849Sbostic 	int i;
2846849Sbostic 	void onintr();
2912262Stut 	char *mktemp();
3012262Stut 
3112262Stut 	if (argc == 1)		/* can't use stdin for seeking anyway */
3212262Stut 	{
3312262Stut 		puts("Usage:  sortbib [-sKEYS] database [...]");
3412262Stut 		puts("\t-s: sort by fields in KEYS (default is AD)");
3512262Stut 		exit(1);
3612262Stut 	}
3712262Stut 	if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
3812262Stut 	{
3912262Stut 		keystr = argv[1]+2;
4012262Stut 		eval(keystr);		/* evaluate A+ for multiple authors */
4112262Stut 		argv++; argc--;
4212262Stut 	}
4312262Stut 	if (argc > MXFILES+1)	/* too many open file streams */
4412262Stut 	{
4512262Stut 		fprintf(stderr,
4612262Stut 		"sortbib: More than %d databases specified\n", MXFILES);
4712262Stut 		exit(1);
4812262Stut 	}
4912262Stut 	for (i = 1; i < argc; i++)		/* open files in arg list */
5012262Stut 		if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
5112262Stut 			error(argv[i]);
5247074Sdonn 	tempfile = mktemp(strdup(_PATH_TMPS));	/* tempfile for sorting keys */
5312262Stut 	if (signal(SIGINT,SIG_IGN) != SIG_IGN)	/* remove if interrupted */
5412262Stut 		signal(SIGINT, onintr);
5512262Stut 	if ((tfp = fopen(tempfile, "w")) == NULL)
5612262Stut 		error(tempfile);
5712262Stut 	for (i = 0; i < argc-1; i++)		/* read keys from bib files */
5812262Stut 		sortbib(fp[i], tfp, i);
5912262Stut 	fclose(tfp);
6012262Stut 	deliver(fp, tfp);	/* do disk seeks and read from biblio files */
6112262Stut 	unlink(tempfile);
6212262Stut 	exit(0);
6312262Stut }
6412262Stut 
6512262Stut int rsmode = 0;		/* record separator: 1 = null line, 2 = bracket */
6612262Stut 
sortbib(fp,tfp,i)6712262Stut sortbib(fp, tfp, i)	/* read records, prepare list for sorting */
6812262Stut FILE *fp, *tfp;
6912262Stut int i;
7012262Stut {
7112262Stut 	long offset, lastoffset = 0, ftell();	/* byte offsets in file */
7212262Stut 	int length, newrec, recno = 0;		/* reclen, new rec'd?, number */
7312262Stut 	char line[BUF], fld[4][BUF];		/* one line, the sort fields */
7412262Stut 
7512262Stut 	/* measure byte offset, then get new line */
7612262Stut 	while (offset = ftell(fp), fgets(line, BUF, fp))
7712262Stut 	{
7812262Stut 		if (recno == 0)		/* accept record w/o initial newline */
7912262Stut 			newrec = 1;
8012262Stut 		if (line[0] == '\n')	/* accept null line record separator */
8112262Stut 		{
8212262Stut 			if (!rsmode)
8312262Stut 				rsmode = 1;	/* null line mode */
8412262Stut 			if (rsmode == 1)
8512262Stut 				newrec = 1;
8612262Stut 		}
8712262Stut 		if (line[0] == '.' && line[1] == '[')	/* also accept .[ .] */
8812262Stut 		{
8912262Stut 			if (!rsmode)
9012262Stut 				rsmode = 2;	/* bracket pair mode */
9112262Stut 			if (rsmode == 2)
9212262Stut 				newrec = 1;
9312262Stut 		}
9412262Stut 		if (newrec)		/* by whatever means above */
9512262Stut 		{
9612262Stut 			newrec = 0;
9712262Stut 			length = offset - lastoffset;	/* measure rec len */
9812262Stut 			if (length > BUF*8)
9912262Stut 			{
10012262Stut 				fprintf(stderr,
10112262Stut 				"sortbib: record %d longer than %d (%d)\n",
10212262Stut 					recno, BUF*8, length);
10312262Stut 				exit(1);
10412262Stut 			}
10512262Stut 			if (recno++)			/* info for sorting */
10612262Stut 			{
10712262Stut 				fprintf(tfp, "%d %D %d : %s %s %s %s\n",
10812262Stut 					i, lastoffset, length,
10912262Stut 					fld[0], fld[1], fld[2], fld[3]);
11012262Stut 				if (ferror(tfp))
11112262Stut 					error(tempfile);
11212262Stut 			}
11312262Stut 			*fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
11412262Stut 			oneauth = 0;		/* reset number of authors */
11512262Stut 			lastoffset = offset;	/* save for next time */
11612262Stut 		}
11712262Stut 		if (line[0] == '%')	/* parse out fields to be sorted */
11812262Stut 			parse(line, fld);
11912262Stut 	}
12012262Stut 	offset = ftell(fp);		/* measure byte offset at EOF */
12112262Stut 	length = offset - lastoffset;	/* measure final record length */
12212262Stut 	if (length > BUF*8)
12312262Stut 	{
12412262Stut 		fprintf(stderr, "sortbib: record %d longer than %d (%d)\n",
12512262Stut 			recno, BUF*8, length);
12612262Stut 		exit(1);
12712262Stut 	}
12812262Stut 	if (line[0] != '\n')		/* ignore null line just before EOF */
12912262Stut 	{
13012262Stut 		fprintf(tfp, "%d %D %d : %s %s %s %s\n",
13112262Stut 			i, lastoffset, length,
13212262Stut 			fld[0], fld[1], fld[2], fld[3]);
13312262Stut 		if (ferror(tfp))
13412262Stut 			error(tempfile);	/* disk error in /tmp */
13512262Stut 	}
13612262Stut }
13712262Stut 
deliver(fp,tfp)13812262Stut deliver(fp, tfp)	/* deliver sorted entries out of database(s) */
13912262Stut FILE *fp[], *tfp;
14012262Stut {
14112262Stut 	char str[BUF], buff[BUF*8];	/* for tempfile & databases */
14232508Sbostic 	char cmd[80];			/* for using system sort command */
14312262Stut 	long int offset;
14412262Stut 	int i, length;
14512262Stut 
14612262Stut 	/* when sorting, ignore case distinctions; tab char is ':' */
14712262Stut 	sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile);
14812262Stut 	if (system(cmd) == 127)
14912262Stut 		error("sortbib");
15012262Stut 	tfp = fopen(tempfile, "r");
15112262Stut 	while (fgets(str, sizeof(str), tfp))
15212262Stut 	{
15312262Stut 		/* get file pointer, record offset, and length */
15412262Stut 		if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3)
15512262Stut 			error("sortbib: sorting error");
15612262Stut 		/* seek to proper disk location in proper file */
15712262Stut 		if (fseek(fp[i], offset, 0) == -1)
15812262Stut 			error("sortbib");
15912262Stut 		/* read exactly one record from bibliography */
16012262Stut 		if (fread(buff, sizeof(*buff), length, fp[i]) == 0)
16112262Stut 			error("sortbib");
16212262Stut 		/* add newline between unseparated records */
16312262Stut 		if (buff[0] != '\n' && rsmode == 1)
16412262Stut 			putchar('\n');
16512262Stut 		/* write record buffer to standard output */
16612262Stut 		if (fwrite(buff, sizeof(*buff), length, stdout) == 0)
16712262Stut 			error("sortbib");
16812262Stut 	}
16912262Stut }
17012262Stut 
parse(line,fld)17112262Stut parse(line, fld)	/* get fields out of line, prepare for sorting */
17212262Stut char line[];
17312262Stut char fld[][BUF];
17412262Stut {
17512262Stut 	char wd[8][BUF/4], *strcat();
17612262Stut 	int n, i, j;
17712262Stut 
17812262Stut 	for (i = 0; i < 8; i++)		/* zap out old strings */
17912262Stut 		*wd[i] = NULL;
18012262Stut 	n = sscanf(line, "%s %s %s %s %s %s %s %s",
18112262Stut 		wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
18212262Stut 	for (i = 0; i < 4; i++)
18312262Stut 	{
18412262Stut 		if (wd[0][1] == keystr[i])
18512262Stut 		{
18612262Stut 			if (wd[0][1] == 'A')
18712262Stut 			{
18812262Stut 				if (oneauth && !multauth)	/* no repeat */
18912262Stut 					break;
19012262Stut 				else if (oneauth)		/* mult auths */
19112262Stut 					strcat(fld[i], "~~");
19212262Stut 				if (!endcomma(wd[n-2]))		/* surname */
19312262Stut 					strcat(fld[i], wd[n-1]);
19412262Stut 				else {				/* jr. or ed. */
19512262Stut 					strcat(fld[i], wd[n-2]);
19612262Stut 					n--;
19712262Stut 				}
19812262Stut 				strcat(fld[i], " ");
19912262Stut 				for (j = 1; j < n-1; j++)
20012262Stut 					strcat(fld[i], wd[j]);
20112262Stut 				oneauth = 1;
20212262Stut 			}
20312262Stut 			else if (wd[0][1] == 'D')
20412262Stut 			{
20512262Stut 				strcat(fld[i], wd[n-1]);	/* year */
20612262Stut 				if (n > 2)
20712262Stut 					strcat(fld[i], wd[1]);	/* month */
20812262Stut 			}
20912262Stut 			else if (wd[0][1] == 'T' || wd[0][1] == 'J')
21012262Stut 			{
21112262Stut 				j = 1;
21212262Stut 				if (article(wd[1]))	/* skip article */
21312262Stut 					j++;
21412262Stut 				for (; j < n; j++)
21512262Stut 					strcat(fld[i], wd[j]);
21612262Stut 			}
21712262Stut 			else  /* any other field */
21812262Stut 				for (j = 1; j < n; j++)
21912262Stut 					strcat(fld[i], wd[j]);
22012262Stut 		}
22112262Stut 		/* %Q quorporate or queer author - unreversed %A */
22212262Stut 		else if (wd[0][1] == 'Q' && keystr[i] == 'A')
22312262Stut 			for (j = 1; j < n; j++)
22412262Stut 				strcat(fld[i], wd[j]);
22512262Stut 	}
22612262Stut }
22712262Stut 
article(str)22812262Stut article(str)		/* see if string contains an article */
22912262Stut char *str;
23012262Stut {
23112262Stut 	if (strcmp("The", str) == 0)	/* English */
23212262Stut 		return(1);
23312262Stut 	if (strcmp("A", str) == 0)
23412262Stut 		return(1);
23512262Stut 	if (strcmp("An", str) == 0)
23612262Stut 		return(1);
23712262Stut 	if (strcmp("Le", str) == 0)	/* French */
23812262Stut 		return(1);
23912262Stut 	if (strcmp("La", str) == 0)
24012262Stut 		return(1);
24112262Stut 	if (strcmp("Der", str) == 0)	/* German */
24212262Stut 		return(1);
24312262Stut 	if (strcmp("Die", str) == 0)
24412262Stut 		return(1);
24512262Stut 	if (strcmp("Das", str) == 0)
24612262Stut 		return(1);
24712262Stut 	if (strcmp("El", str) == 0)	/* Spanish */
24812262Stut 		return(1);
24912262Stut 	if (strcmp("Den", str) == 0)	/* Scandinavian */
25012262Stut 		return(1);
25112262Stut 	return(0);
25212262Stut }
25312262Stut 
eval(keystr)25412262Stut eval(keystr)		/* evaluate key string for A+ marking */
25512262Stut char keystr[];
25612262Stut {
25712262Stut 	int i, j;
25812262Stut 
25912262Stut 	for (i = 0, j = 0; keystr[i]; i++, j++)
26012262Stut 	{
26112262Stut 		if (keystr[i] == '+')
26212262Stut 		{
26312262Stut 			multauth = 1;
26412262Stut 			i++;
26512262Stut 		}
26612262Stut 		keystr[j] = keystr[i];
26712262Stut 	}
26812262Stut 	keystr[j] = NULL;
26912262Stut }
27012262Stut 
error(s)27112262Stut error(s)		/* exit in case of various system errors */
27212262Stut char *s;
27312262Stut {
27412262Stut 	perror(s);
27512262Stut 	exit(1);
27612262Stut }
27712262Stut 
27846849Sbostic void
onintr()27912262Stut onintr()		/* remove tempfile in case of interrupt */
28012262Stut {
28112262Stut 	fprintf(stderr, "\nInterrupt\n");
28212262Stut 	unlink(tempfile);
28312262Stut 	exit(1);
28412262Stut }
28512262Stut 
endcomma(str)28612262Stut endcomma(str)
28712262Stut char *str;
28812262Stut {
28912262Stut 	int n;
29012262Stut 
29112262Stut 	n = strlen(str) - 1;
29212262Stut 	if (str[n] == ',')
29312262Stut 	{
29412262Stut 		str[n] = NULL;
29512262Stut 		return(1);
29612262Stut 	}
29712262Stut 	return(0);
29812262Stut }
299