xref: /csrg-svn/old/refer/sortbib/sortbib.c (revision 32508)
112262Stut #ifndef lint
2*32508Sbostic static char *sccsid = "@(#)sortbib.c	4.2 (Berkeley) 10/22/87";
312262Stut #endif
412262Stut 
512262Stut #include <stdio.h>
612262Stut #include <signal.h>
712262Stut #define BUF BUFSIZ
812262Stut #define MXFILES 16
912262Stut 
1012262Stut char *tempfile;			/* temporary file for sorting keys */
1112262Stut char *keystr = "AD";		/* default sorting on author and date */
1212262Stut int multauth = 0;		/* by default sort on senior author only */
1312262Stut int oneauth;			/* has there been author in the record? */
1412262Stut 
1512262Stut main(argc, argv)	/* sortbib: sort bibliographic database in place */
1612262Stut int argc;
1712262Stut char *argv[];
1812262Stut {
1912262Stut 	FILE *fp[MXFILES], *tfp, *fopen();
2012262Stut 	int i, onintr();
2112262Stut 	char *mktemp();
2212262Stut 
2312262Stut 	if (argc == 1)		/* can't use stdin for seeking anyway */
2412262Stut 	{
2512262Stut 		puts("Usage:  sortbib [-sKEYS] database [...]");
2612262Stut 		puts("\t-s: sort by fields in KEYS (default is AD)");
2712262Stut 		exit(1);
2812262Stut 	}
2912262Stut 	if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
3012262Stut 	{
3112262Stut 		keystr = argv[1]+2;
3212262Stut 		eval(keystr);		/* evaluate A+ for multiple authors */
3312262Stut 		argv++; argc--;
3412262Stut 	}
3512262Stut 	if (argc > MXFILES+1)	/* too many open file streams */
3612262Stut 	{
3712262Stut 		fprintf(stderr,
3812262Stut 		"sortbib: More than %d databases specified\n", MXFILES);
3912262Stut 		exit(1);
4012262Stut 	}
4112262Stut 	for (i = 1; i < argc; i++)		/* open files in arg list */
4212262Stut 		if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
4312262Stut 			error(argv[i]);
4412262Stut 	tempfile = "/tmp/SbibXXXXX";		/* tempfile for sorting keys */
4512262Stut 	mktemp(tempfile);
4612262Stut 	if (signal(SIGINT,SIG_IGN) != SIG_IGN)	/* remove if interrupted */
4712262Stut 		signal(SIGINT, onintr);
4812262Stut 	if ((tfp = fopen(tempfile, "w")) == NULL)
4912262Stut 		error(tempfile);
5012262Stut 	for (i = 0; i < argc-1; i++)		/* read keys from bib files */
5112262Stut 		sortbib(fp[i], tfp, i);
5212262Stut 	fclose(tfp);
5312262Stut 	deliver(fp, tfp);	/* do disk seeks and read from biblio files */
5412262Stut 	unlink(tempfile);
5512262Stut 	exit(0);
5612262Stut }
5712262Stut 
5812262Stut int rsmode = 0;		/* record separator: 1 = null line, 2 = bracket */
5912262Stut 
6012262Stut sortbib(fp, tfp, i)	/* read records, prepare list for sorting */
6112262Stut FILE *fp, *tfp;
6212262Stut int i;
6312262Stut {
6412262Stut 	long offset, lastoffset = 0, ftell();	/* byte offsets in file */
6512262Stut 	int length, newrec, recno = 0;		/* reclen, new rec'd?, number */
6612262Stut 	char line[BUF], fld[4][BUF];		/* one line, the sort fields */
6712262Stut 
6812262Stut 	/* measure byte offset, then get new line */
6912262Stut 	while (offset = ftell(fp), fgets(line, BUF, fp))
7012262Stut 	{
7112262Stut 		if (recno == 0)		/* accept record w/o initial newline */
7212262Stut 			newrec = 1;
7312262Stut 		if (line[0] == '\n')	/* accept null line record separator */
7412262Stut 		{
7512262Stut 			if (!rsmode)
7612262Stut 				rsmode = 1;	/* null line mode */
7712262Stut 			if (rsmode == 1)
7812262Stut 				newrec = 1;
7912262Stut 		}
8012262Stut 		if (line[0] == '.' && line[1] == '[')	/* also accept .[ .] */
8112262Stut 		{
8212262Stut 			if (!rsmode)
8312262Stut 				rsmode = 2;	/* bracket pair mode */
8412262Stut 			if (rsmode == 2)
8512262Stut 				newrec = 1;
8612262Stut 		}
8712262Stut 		if (newrec)		/* by whatever means above */
8812262Stut 		{
8912262Stut 			newrec = 0;
9012262Stut 			length = offset - lastoffset;	/* measure rec len */
9112262Stut 			if (length > BUF*8)
9212262Stut 			{
9312262Stut 				fprintf(stderr,
9412262Stut 				"sortbib: record %d longer than %d (%d)\n",
9512262Stut 					recno, BUF*8, length);
9612262Stut 				exit(1);
9712262Stut 			}
9812262Stut 			if (recno++)			/* info for sorting */
9912262Stut 			{
10012262Stut 				fprintf(tfp, "%d %D %d : %s %s %s %s\n",
10112262Stut 					i, lastoffset, length,
10212262Stut 					fld[0], fld[1], fld[2], fld[3]);
10312262Stut 				if (ferror(tfp))
10412262Stut 					error(tempfile);
10512262Stut 			}
10612262Stut 			*fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
10712262Stut 			oneauth = 0;		/* reset number of authors */
10812262Stut 			lastoffset = offset;	/* save for next time */
10912262Stut 		}
11012262Stut 		if (line[0] == '%')	/* parse out fields to be sorted */
11112262Stut 			parse(line, fld);
11212262Stut 	}
11312262Stut 	offset = ftell(fp);		/* measure byte offset at EOF */
11412262Stut 	length = offset - lastoffset;	/* measure final record length */
11512262Stut 	if (length > BUF*8)
11612262Stut 	{
11712262Stut 		fprintf(stderr, "sortbib: record %d longer than %d (%d)\n",
11812262Stut 			recno, BUF*8, length);
11912262Stut 		exit(1);
12012262Stut 	}
12112262Stut 	if (line[0] != '\n')		/* ignore null line just before EOF */
12212262Stut 	{
12312262Stut 		fprintf(tfp, "%d %D %d : %s %s %s %s\n",
12412262Stut 			i, lastoffset, length,
12512262Stut 			fld[0], fld[1], fld[2], fld[3]);
12612262Stut 		if (ferror(tfp))
12712262Stut 			error(tempfile);	/* disk error in /tmp */
12812262Stut 	}
12912262Stut }
13012262Stut 
13112262Stut deliver(fp, tfp)	/* deliver sorted entries out of database(s) */
13212262Stut FILE *fp[], *tfp;
13312262Stut {
13412262Stut 	char str[BUF], buff[BUF*8];	/* for tempfile & databases */
135*32508Sbostic 	char cmd[80];			/* for using system sort command */
13612262Stut 	long int offset;
13712262Stut 	int i, length;
13812262Stut 
13912262Stut 	/* when sorting, ignore case distinctions; tab char is ':' */
14012262Stut 	sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile);
14112262Stut 	if (system(cmd) == 127)
14212262Stut 		error("sortbib");
14312262Stut 	tfp = fopen(tempfile, "r");
14412262Stut 	while (fgets(str, sizeof(str), tfp))
14512262Stut 	{
14612262Stut 		/* get file pointer, record offset, and length */
14712262Stut 		if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3)
14812262Stut 			error("sortbib: sorting error");
14912262Stut 		/* seek to proper disk location in proper file */
15012262Stut 		if (fseek(fp[i], offset, 0) == -1)
15112262Stut 			error("sortbib");
15212262Stut 		/* read exactly one record from bibliography */
15312262Stut 		if (fread(buff, sizeof(*buff), length, fp[i]) == 0)
15412262Stut 			error("sortbib");
15512262Stut 		/* add newline between unseparated records */
15612262Stut 		if (buff[0] != '\n' && rsmode == 1)
15712262Stut 			putchar('\n');
15812262Stut 		/* write record buffer to standard output */
15912262Stut 		if (fwrite(buff, sizeof(*buff), length, stdout) == 0)
16012262Stut 			error("sortbib");
16112262Stut 	}
16212262Stut }
16312262Stut 
16412262Stut parse(line, fld)	/* get fields out of line, prepare for sorting */
16512262Stut char line[];
16612262Stut char fld[][BUF];
16712262Stut {
16812262Stut 	char wd[8][BUF/4], *strcat();
16912262Stut 	int n, i, j;
17012262Stut 
17112262Stut 	for (i = 0; i < 8; i++)		/* zap out old strings */
17212262Stut 		*wd[i] = NULL;
17312262Stut 	n = sscanf(line, "%s %s %s %s %s %s %s %s",
17412262Stut 		wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
17512262Stut 	for (i = 0; i < 4; i++)
17612262Stut 	{
17712262Stut 		if (wd[0][1] == keystr[i])
17812262Stut 		{
17912262Stut 			if (wd[0][1] == 'A')
18012262Stut 			{
18112262Stut 				if (oneauth && !multauth)	/* no repeat */
18212262Stut 					break;
18312262Stut 				else if (oneauth)		/* mult auths */
18412262Stut 					strcat(fld[i], "~~");
18512262Stut 				if (!endcomma(wd[n-2]))		/* surname */
18612262Stut 					strcat(fld[i], wd[n-1]);
18712262Stut 				else {				/* jr. or ed. */
18812262Stut 					strcat(fld[i], wd[n-2]);
18912262Stut 					n--;
19012262Stut 				}
19112262Stut 				strcat(fld[i], " ");
19212262Stut 				for (j = 1; j < n-1; j++)
19312262Stut 					strcat(fld[i], wd[j]);
19412262Stut 				oneauth = 1;
19512262Stut 			}
19612262Stut 			else if (wd[0][1] == 'D')
19712262Stut 			{
19812262Stut 				strcat(fld[i], wd[n-1]);	/* year */
19912262Stut 				if (n > 2)
20012262Stut 					strcat(fld[i], wd[1]);	/* month */
20112262Stut 			}
20212262Stut 			else if (wd[0][1] == 'T' || wd[0][1] == 'J')
20312262Stut 			{
20412262Stut 				j = 1;
20512262Stut 				if (article(wd[1]))	/* skip article */
20612262Stut 					j++;
20712262Stut 				for (; j < n; j++)
20812262Stut 					strcat(fld[i], wd[j]);
20912262Stut 			}
21012262Stut 			else  /* any other field */
21112262Stut 				for (j = 1; j < n; j++)
21212262Stut 					strcat(fld[i], wd[j]);
21312262Stut 		}
21412262Stut 		/* %Q quorporate or queer author - unreversed %A */
21512262Stut 		else if (wd[0][1] == 'Q' && keystr[i] == 'A')
21612262Stut 			for (j = 1; j < n; j++)
21712262Stut 				strcat(fld[i], wd[j]);
21812262Stut 	}
21912262Stut }
22012262Stut 
22112262Stut article(str)		/* see if string contains an article */
22212262Stut char *str;
22312262Stut {
22412262Stut 	if (strcmp("The", str) == 0)	/* English */
22512262Stut 		return(1);
22612262Stut 	if (strcmp("A", str) == 0)
22712262Stut 		return(1);
22812262Stut 	if (strcmp("An", str) == 0)
22912262Stut 		return(1);
23012262Stut 	if (strcmp("Le", str) == 0)	/* French */
23112262Stut 		return(1);
23212262Stut 	if (strcmp("La", str) == 0)
23312262Stut 		return(1);
23412262Stut 	if (strcmp("Der", str) == 0)	/* German */
23512262Stut 		return(1);
23612262Stut 	if (strcmp("Die", str) == 0)
23712262Stut 		return(1);
23812262Stut 	if (strcmp("Das", str) == 0)
23912262Stut 		return(1);
24012262Stut 	if (strcmp("El", str) == 0)	/* Spanish */
24112262Stut 		return(1);
24212262Stut 	if (strcmp("Den", str) == 0)	/* Scandinavian */
24312262Stut 		return(1);
24412262Stut 	return(0);
24512262Stut }
24612262Stut 
24712262Stut eval(keystr)		/* evaluate key string for A+ marking */
24812262Stut char keystr[];
24912262Stut {
25012262Stut 	int i, j;
25112262Stut 
25212262Stut 	for (i = 0, j = 0; keystr[i]; i++, j++)
25312262Stut 	{
25412262Stut 		if (keystr[i] == '+')
25512262Stut 		{
25612262Stut 			multauth = 1;
25712262Stut 			i++;
25812262Stut 		}
25912262Stut 		keystr[j] = keystr[i];
26012262Stut 	}
26112262Stut 	keystr[j] = NULL;
26212262Stut }
26312262Stut 
26412262Stut error(s)		/* exit in case of various system errors */
26512262Stut char *s;
26612262Stut {
26712262Stut 	perror(s);
26812262Stut 	exit(1);
26912262Stut }
27012262Stut 
27112262Stut onintr()		/* remove tempfile in case of interrupt */
27212262Stut {
27312262Stut 	fprintf(stderr, "\nInterrupt\n");
27412262Stut 	unlink(tempfile);
27512262Stut 	exit(1);
27612262Stut }
27712262Stut 
27812262Stut endcomma(str)
27912262Stut char *str;
28012262Stut {
28112262Stut 	int n;
28212262Stut 
28312262Stut 	n = strlen(str) - 1;
28412262Stut 	if (str[n] == ',')
28512262Stut 	{
28612262Stut 		str[n] = NULL;
28712262Stut 		return(1);
28812262Stut 	}
28912262Stut 	return(0);
29012262Stut }
291