xref: /csrg-svn/old/refer/sortbib/sortbib.c (revision 37895)
112262Stut #ifndef lint
2*37895Sbostic static char *sccsid = "@(#)sortbib.c	4.3 (Berkeley) 05/11/89";
312262Stut #endif
412262Stut 
512262Stut #include <stdio.h>
612262Stut #include <signal.h>
7*37895Sbostic #include "pathnames.h"
8*37895Sbostic 
912262Stut #define BUF BUFSIZ
1012262Stut #define MXFILES 16
1112262Stut 
1212262Stut char *tempfile;			/* temporary file for sorting keys */
1312262Stut char *keystr = "AD";		/* default sorting on author and date */
1412262Stut int multauth = 0;		/* by default sort on senior author only */
1512262Stut int oneauth;			/* has there been author in the record? */
1612262Stut 
1712262Stut main(argc, argv)	/* sortbib: sort bibliographic database in place */
1812262Stut int argc;
1912262Stut char *argv[];
2012262Stut {
2112262Stut 	FILE *fp[MXFILES], *tfp, *fopen();
2212262Stut 	int i, onintr();
2312262Stut 	char *mktemp();
2412262Stut 
2512262Stut 	if (argc == 1)		/* can't use stdin for seeking anyway */
2612262Stut 	{
2712262Stut 		puts("Usage:  sortbib [-sKEYS] database [...]");
2812262Stut 		puts("\t-s: sort by fields in KEYS (default is AD)");
2912262Stut 		exit(1);
3012262Stut 	}
3112262Stut 	if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
3212262Stut 	{
3312262Stut 		keystr = argv[1]+2;
3412262Stut 		eval(keystr);		/* evaluate A+ for multiple authors */
3512262Stut 		argv++; argc--;
3612262Stut 	}
3712262Stut 	if (argc > MXFILES+1)	/* too many open file streams */
3812262Stut 	{
3912262Stut 		fprintf(stderr,
4012262Stut 		"sortbib: More than %d databases specified\n", MXFILES);
4112262Stut 		exit(1);
4212262Stut 	}
4312262Stut 	for (i = 1; i < argc; i++)		/* open files in arg list */
4412262Stut 		if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
4512262Stut 			error(argv[i]);
46*37895Sbostic 	tempfile = _PATH_TMPS;			/* tempfile for sorting keys */
4712262Stut 	mktemp(tempfile);
4812262Stut 	if (signal(SIGINT,SIG_IGN) != SIG_IGN)	/* remove if interrupted */
4912262Stut 		signal(SIGINT, onintr);
5012262Stut 	if ((tfp = fopen(tempfile, "w")) == NULL)
5112262Stut 		error(tempfile);
5212262Stut 	for (i = 0; i < argc-1; i++)		/* read keys from bib files */
5312262Stut 		sortbib(fp[i], tfp, i);
5412262Stut 	fclose(tfp);
5512262Stut 	deliver(fp, tfp);	/* do disk seeks and read from biblio files */
5612262Stut 	unlink(tempfile);
5712262Stut 	exit(0);
5812262Stut }
5912262Stut 
6012262Stut int rsmode = 0;		/* record separator: 1 = null line, 2 = bracket */
6112262Stut 
6212262Stut sortbib(fp, tfp, i)	/* read records, prepare list for sorting */
6312262Stut FILE *fp, *tfp;
6412262Stut int i;
6512262Stut {
6612262Stut 	long offset, lastoffset = 0, ftell();	/* byte offsets in file */
6712262Stut 	int length, newrec, recno = 0;		/* reclen, new rec'd?, number */
6812262Stut 	char line[BUF], fld[4][BUF];		/* one line, the sort fields */
6912262Stut 
7012262Stut 	/* measure byte offset, then get new line */
7112262Stut 	while (offset = ftell(fp), fgets(line, BUF, fp))
7212262Stut 	{
7312262Stut 		if (recno == 0)		/* accept record w/o initial newline */
7412262Stut 			newrec = 1;
7512262Stut 		if (line[0] == '\n')	/* accept null line record separator */
7612262Stut 		{
7712262Stut 			if (!rsmode)
7812262Stut 				rsmode = 1;	/* null line mode */
7912262Stut 			if (rsmode == 1)
8012262Stut 				newrec = 1;
8112262Stut 		}
8212262Stut 		if (line[0] == '.' && line[1] == '[')	/* also accept .[ .] */
8312262Stut 		{
8412262Stut 			if (!rsmode)
8512262Stut 				rsmode = 2;	/* bracket pair mode */
8612262Stut 			if (rsmode == 2)
8712262Stut 				newrec = 1;
8812262Stut 		}
8912262Stut 		if (newrec)		/* by whatever means above */
9012262Stut 		{
9112262Stut 			newrec = 0;
9212262Stut 			length = offset - lastoffset;	/* measure rec len */
9312262Stut 			if (length > BUF*8)
9412262Stut 			{
9512262Stut 				fprintf(stderr,
9612262Stut 				"sortbib: record %d longer than %d (%d)\n",
9712262Stut 					recno, BUF*8, length);
9812262Stut 				exit(1);
9912262Stut 			}
10012262Stut 			if (recno++)			/* info for sorting */
10112262Stut 			{
10212262Stut 				fprintf(tfp, "%d %D %d : %s %s %s %s\n",
10312262Stut 					i, lastoffset, length,
10412262Stut 					fld[0], fld[1], fld[2], fld[3]);
10512262Stut 				if (ferror(tfp))
10612262Stut 					error(tempfile);
10712262Stut 			}
10812262Stut 			*fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
10912262Stut 			oneauth = 0;		/* reset number of authors */
11012262Stut 			lastoffset = offset;	/* save for next time */
11112262Stut 		}
11212262Stut 		if (line[0] == '%')	/* parse out fields to be sorted */
11312262Stut 			parse(line, fld);
11412262Stut 	}
11512262Stut 	offset = ftell(fp);		/* measure byte offset at EOF */
11612262Stut 	length = offset - lastoffset;	/* measure final record length */
11712262Stut 	if (length > BUF*8)
11812262Stut 	{
11912262Stut 		fprintf(stderr, "sortbib: record %d longer than %d (%d)\n",
12012262Stut 			recno, BUF*8, length);
12112262Stut 		exit(1);
12212262Stut 	}
12312262Stut 	if (line[0] != '\n')		/* ignore null line just before EOF */
12412262Stut 	{
12512262Stut 		fprintf(tfp, "%d %D %d : %s %s %s %s\n",
12612262Stut 			i, lastoffset, length,
12712262Stut 			fld[0], fld[1], fld[2], fld[3]);
12812262Stut 		if (ferror(tfp))
12912262Stut 			error(tempfile);	/* disk error in /tmp */
13012262Stut 	}
13112262Stut }
13212262Stut 
13312262Stut deliver(fp, tfp)	/* deliver sorted entries out of database(s) */
13412262Stut FILE *fp[], *tfp;
13512262Stut {
13612262Stut 	char str[BUF], buff[BUF*8];	/* for tempfile & databases */
13732508Sbostic 	char cmd[80];			/* for using system sort command */
13812262Stut 	long int offset;
13912262Stut 	int i, length;
14012262Stut 
14112262Stut 	/* when sorting, ignore case distinctions; tab char is ':' */
14212262Stut 	sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile);
14312262Stut 	if (system(cmd) == 127)
14412262Stut 		error("sortbib");
14512262Stut 	tfp = fopen(tempfile, "r");
14612262Stut 	while (fgets(str, sizeof(str), tfp))
14712262Stut 	{
14812262Stut 		/* get file pointer, record offset, and length */
14912262Stut 		if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3)
15012262Stut 			error("sortbib: sorting error");
15112262Stut 		/* seek to proper disk location in proper file */
15212262Stut 		if (fseek(fp[i], offset, 0) == -1)
15312262Stut 			error("sortbib");
15412262Stut 		/* read exactly one record from bibliography */
15512262Stut 		if (fread(buff, sizeof(*buff), length, fp[i]) == 0)
15612262Stut 			error("sortbib");
15712262Stut 		/* add newline between unseparated records */
15812262Stut 		if (buff[0] != '\n' && rsmode == 1)
15912262Stut 			putchar('\n');
16012262Stut 		/* write record buffer to standard output */
16112262Stut 		if (fwrite(buff, sizeof(*buff), length, stdout) == 0)
16212262Stut 			error("sortbib");
16312262Stut 	}
16412262Stut }
16512262Stut 
16612262Stut parse(line, fld)	/* get fields out of line, prepare for sorting */
16712262Stut char line[];
16812262Stut char fld[][BUF];
16912262Stut {
17012262Stut 	char wd[8][BUF/4], *strcat();
17112262Stut 	int n, i, j;
17212262Stut 
17312262Stut 	for (i = 0; i < 8; i++)		/* zap out old strings */
17412262Stut 		*wd[i] = NULL;
17512262Stut 	n = sscanf(line, "%s %s %s %s %s %s %s %s",
17612262Stut 		wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
17712262Stut 	for (i = 0; i < 4; i++)
17812262Stut 	{
17912262Stut 		if (wd[0][1] == keystr[i])
18012262Stut 		{
18112262Stut 			if (wd[0][1] == 'A')
18212262Stut 			{
18312262Stut 				if (oneauth && !multauth)	/* no repeat */
18412262Stut 					break;
18512262Stut 				else if (oneauth)		/* mult auths */
18612262Stut 					strcat(fld[i], "~~");
18712262Stut 				if (!endcomma(wd[n-2]))		/* surname */
18812262Stut 					strcat(fld[i], wd[n-1]);
18912262Stut 				else {				/* jr. or ed. */
19012262Stut 					strcat(fld[i], wd[n-2]);
19112262Stut 					n--;
19212262Stut 				}
19312262Stut 				strcat(fld[i], " ");
19412262Stut 				for (j = 1; j < n-1; j++)
19512262Stut 					strcat(fld[i], wd[j]);
19612262Stut 				oneauth = 1;
19712262Stut 			}
19812262Stut 			else if (wd[0][1] == 'D')
19912262Stut 			{
20012262Stut 				strcat(fld[i], wd[n-1]);	/* year */
20112262Stut 				if (n > 2)
20212262Stut 					strcat(fld[i], wd[1]);	/* month */
20312262Stut 			}
20412262Stut 			else if (wd[0][1] == 'T' || wd[0][1] == 'J')
20512262Stut 			{
20612262Stut 				j = 1;
20712262Stut 				if (article(wd[1]))	/* skip article */
20812262Stut 					j++;
20912262Stut 				for (; j < n; j++)
21012262Stut 					strcat(fld[i], wd[j]);
21112262Stut 			}
21212262Stut 			else  /* any other field */
21312262Stut 				for (j = 1; j < n; j++)
21412262Stut 					strcat(fld[i], wd[j]);
21512262Stut 		}
21612262Stut 		/* %Q quorporate or queer author - unreversed %A */
21712262Stut 		else if (wd[0][1] == 'Q' && keystr[i] == 'A')
21812262Stut 			for (j = 1; j < n; j++)
21912262Stut 				strcat(fld[i], wd[j]);
22012262Stut 	}
22112262Stut }
22212262Stut 
22312262Stut article(str)		/* see if string contains an article */
22412262Stut char *str;
22512262Stut {
22612262Stut 	if (strcmp("The", str) == 0)	/* English */
22712262Stut 		return(1);
22812262Stut 	if (strcmp("A", str) == 0)
22912262Stut 		return(1);
23012262Stut 	if (strcmp("An", str) == 0)
23112262Stut 		return(1);
23212262Stut 	if (strcmp("Le", str) == 0)	/* French */
23312262Stut 		return(1);
23412262Stut 	if (strcmp("La", str) == 0)
23512262Stut 		return(1);
23612262Stut 	if (strcmp("Der", str) == 0)	/* German */
23712262Stut 		return(1);
23812262Stut 	if (strcmp("Die", str) == 0)
23912262Stut 		return(1);
24012262Stut 	if (strcmp("Das", str) == 0)
24112262Stut 		return(1);
24212262Stut 	if (strcmp("El", str) == 0)	/* Spanish */
24312262Stut 		return(1);
24412262Stut 	if (strcmp("Den", str) == 0)	/* Scandinavian */
24512262Stut 		return(1);
24612262Stut 	return(0);
24712262Stut }
24812262Stut 
24912262Stut eval(keystr)		/* evaluate key string for A+ marking */
25012262Stut char keystr[];
25112262Stut {
25212262Stut 	int i, j;
25312262Stut 
25412262Stut 	for (i = 0, j = 0; keystr[i]; i++, j++)
25512262Stut 	{
25612262Stut 		if (keystr[i] == '+')
25712262Stut 		{
25812262Stut 			multauth = 1;
25912262Stut 			i++;
26012262Stut 		}
26112262Stut 		keystr[j] = keystr[i];
26212262Stut 	}
26312262Stut 	keystr[j] = NULL;
26412262Stut }
26512262Stut 
26612262Stut error(s)		/* exit in case of various system errors */
26712262Stut char *s;
26812262Stut {
26912262Stut 	perror(s);
27012262Stut 	exit(1);
27112262Stut }
27212262Stut 
27312262Stut onintr()		/* remove tempfile in case of interrupt */
27412262Stut {
27512262Stut 	fprintf(stderr, "\nInterrupt\n");
27612262Stut 	unlink(tempfile);
27712262Stut 	exit(1);
27812262Stut }
27912262Stut 
28012262Stut endcomma(str)
28112262Stut char *str;
28212262Stut {
28312262Stut 	int n;
28412262Stut 
28512262Stut 	n = strlen(str) - 1;
28612262Stut 	if (str[n] == ',')
28712262Stut 	{
28812262Stut 		str[n] = NULL;
28912262Stut 		return(1);
29012262Stut 	}
29112262Stut 	return(0);
29212262Stut }
293