xref: /csrg-svn/old/refer/sortbib/sortbib.c (revision 47074)
112262Stut #ifndef lint
2*47074Sdonn static char *sccsid = "@(#)sortbib.c	4.5 (Berkeley) 03/07/91";
312262Stut #endif
412262Stut 
512262Stut #include <stdio.h>
612262Stut #include <signal.h>
7*47074Sdonn #include <string.h>
837895Sbostic #include "pathnames.h"
937895Sbostic 
1012262Stut #define BUF BUFSIZ
1112262Stut #define MXFILES 16
1212262Stut 
1312262Stut char *tempfile;			/* temporary file for sorting keys */
1412262Stut char *keystr = "AD";		/* default sorting on author and date */
1512262Stut int multauth = 0;		/* by default sort on senior author only */
1612262Stut int oneauth;			/* has there been author in the record? */
1712262Stut 
1812262Stut main(argc, argv)	/* sortbib: sort bibliographic database in place */
1912262Stut int argc;
2012262Stut char *argv[];
2112262Stut {
2212262Stut 	FILE *fp[MXFILES], *tfp, *fopen();
2346849Sbostic 	int i;
2446849Sbostic 	void onintr();
2512262Stut 	char *mktemp();
2612262Stut 
2712262Stut 	if (argc == 1)		/* can't use stdin for seeking anyway */
2812262Stut 	{
2912262Stut 		puts("Usage:  sortbib [-sKEYS] database [...]");
3012262Stut 		puts("\t-s: sort by fields in KEYS (default is AD)");
3112262Stut 		exit(1);
3212262Stut 	}
3312262Stut 	if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
3412262Stut 	{
3512262Stut 		keystr = argv[1]+2;
3612262Stut 		eval(keystr);		/* evaluate A+ for multiple authors */
3712262Stut 		argv++; argc--;
3812262Stut 	}
3912262Stut 	if (argc > MXFILES+1)	/* too many open file streams */
4012262Stut 	{
4112262Stut 		fprintf(stderr,
4212262Stut 		"sortbib: More than %d databases specified\n", MXFILES);
4312262Stut 		exit(1);
4412262Stut 	}
4512262Stut 	for (i = 1; i < argc; i++)		/* open files in arg list */
4612262Stut 		if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
4712262Stut 			error(argv[i]);
48*47074Sdonn 	tempfile = mktemp(strdup(_PATH_TMPS));	/* tempfile for sorting keys */
4912262Stut 	if (signal(SIGINT,SIG_IGN) != SIG_IGN)	/* remove if interrupted */
5012262Stut 		signal(SIGINT, onintr);
5112262Stut 	if ((tfp = fopen(tempfile, "w")) == NULL)
5212262Stut 		error(tempfile);
5312262Stut 	for (i = 0; i < argc-1; i++)		/* read keys from bib files */
5412262Stut 		sortbib(fp[i], tfp, i);
5512262Stut 	fclose(tfp);
5612262Stut 	deliver(fp, tfp);	/* do disk seeks and read from biblio files */
5712262Stut 	unlink(tempfile);
5812262Stut 	exit(0);
5912262Stut }
6012262Stut 
6112262Stut int rsmode = 0;		/* record separator: 1 = null line, 2 = bracket */
6212262Stut 
6312262Stut sortbib(fp, tfp, i)	/* read records, prepare list for sorting */
6412262Stut FILE *fp, *tfp;
6512262Stut int i;
6612262Stut {
6712262Stut 	long offset, lastoffset = 0, ftell();	/* byte offsets in file */
6812262Stut 	int length, newrec, recno = 0;		/* reclen, new rec'd?, number */
6912262Stut 	char line[BUF], fld[4][BUF];		/* one line, the sort fields */
7012262Stut 
7112262Stut 	/* measure byte offset, then get new line */
7212262Stut 	while (offset = ftell(fp), fgets(line, BUF, fp))
7312262Stut 	{
7412262Stut 		if (recno == 0)		/* accept record w/o initial newline */
7512262Stut 			newrec = 1;
7612262Stut 		if (line[0] == '\n')	/* accept null line record separator */
7712262Stut 		{
7812262Stut 			if (!rsmode)
7912262Stut 				rsmode = 1;	/* null line mode */
8012262Stut 			if (rsmode == 1)
8112262Stut 				newrec = 1;
8212262Stut 		}
8312262Stut 		if (line[0] == '.' && line[1] == '[')	/* also accept .[ .] */
8412262Stut 		{
8512262Stut 			if (!rsmode)
8612262Stut 				rsmode = 2;	/* bracket pair mode */
8712262Stut 			if (rsmode == 2)
8812262Stut 				newrec = 1;
8912262Stut 		}
9012262Stut 		if (newrec)		/* by whatever means above */
9112262Stut 		{
9212262Stut 			newrec = 0;
9312262Stut 			length = offset - lastoffset;	/* measure rec len */
9412262Stut 			if (length > BUF*8)
9512262Stut 			{
9612262Stut 				fprintf(stderr,
9712262Stut 				"sortbib: record %d longer than %d (%d)\n",
9812262Stut 					recno, BUF*8, length);
9912262Stut 				exit(1);
10012262Stut 			}
10112262Stut 			if (recno++)			/* info for sorting */
10212262Stut 			{
10312262Stut 				fprintf(tfp, "%d %D %d : %s %s %s %s\n",
10412262Stut 					i, lastoffset, length,
10512262Stut 					fld[0], fld[1], fld[2], fld[3]);
10612262Stut 				if (ferror(tfp))
10712262Stut 					error(tempfile);
10812262Stut 			}
10912262Stut 			*fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
11012262Stut 			oneauth = 0;		/* reset number of authors */
11112262Stut 			lastoffset = offset;	/* save for next time */
11212262Stut 		}
11312262Stut 		if (line[0] == '%')	/* parse out fields to be sorted */
11412262Stut 			parse(line, fld);
11512262Stut 	}
11612262Stut 	offset = ftell(fp);		/* measure byte offset at EOF */
11712262Stut 	length = offset - lastoffset;	/* measure final record length */
11812262Stut 	if (length > BUF*8)
11912262Stut 	{
12012262Stut 		fprintf(stderr, "sortbib: record %d longer than %d (%d)\n",
12112262Stut 			recno, BUF*8, length);
12212262Stut 		exit(1);
12312262Stut 	}
12412262Stut 	if (line[0] != '\n')		/* ignore null line just before EOF */
12512262Stut 	{
12612262Stut 		fprintf(tfp, "%d %D %d : %s %s %s %s\n",
12712262Stut 			i, lastoffset, length,
12812262Stut 			fld[0], fld[1], fld[2], fld[3]);
12912262Stut 		if (ferror(tfp))
13012262Stut 			error(tempfile);	/* disk error in /tmp */
13112262Stut 	}
13212262Stut }
13312262Stut 
13412262Stut deliver(fp, tfp)	/* deliver sorted entries out of database(s) */
13512262Stut FILE *fp[], *tfp;
13612262Stut {
13712262Stut 	char str[BUF], buff[BUF*8];	/* for tempfile & databases */
13832508Sbostic 	char cmd[80];			/* for using system sort command */
13912262Stut 	long int offset;
14012262Stut 	int i, length;
14112262Stut 
14212262Stut 	/* when sorting, ignore case distinctions; tab char is ':' */
14312262Stut 	sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile);
14412262Stut 	if (system(cmd) == 127)
14512262Stut 		error("sortbib");
14612262Stut 	tfp = fopen(tempfile, "r");
14712262Stut 	while (fgets(str, sizeof(str), tfp))
14812262Stut 	{
14912262Stut 		/* get file pointer, record offset, and length */
15012262Stut 		if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3)
15112262Stut 			error("sortbib: sorting error");
15212262Stut 		/* seek to proper disk location in proper file */
15312262Stut 		if (fseek(fp[i], offset, 0) == -1)
15412262Stut 			error("sortbib");
15512262Stut 		/* read exactly one record from bibliography */
15612262Stut 		if (fread(buff, sizeof(*buff), length, fp[i]) == 0)
15712262Stut 			error("sortbib");
15812262Stut 		/* add newline between unseparated records */
15912262Stut 		if (buff[0] != '\n' && rsmode == 1)
16012262Stut 			putchar('\n');
16112262Stut 		/* write record buffer to standard output */
16212262Stut 		if (fwrite(buff, sizeof(*buff), length, stdout) == 0)
16312262Stut 			error("sortbib");
16412262Stut 	}
16512262Stut }
16612262Stut 
16712262Stut parse(line, fld)	/* get fields out of line, prepare for sorting */
16812262Stut char line[];
16912262Stut char fld[][BUF];
17012262Stut {
17112262Stut 	char wd[8][BUF/4], *strcat();
17212262Stut 	int n, i, j;
17312262Stut 
17412262Stut 	for (i = 0; i < 8; i++)		/* zap out old strings */
17512262Stut 		*wd[i] = NULL;
17612262Stut 	n = sscanf(line, "%s %s %s %s %s %s %s %s",
17712262Stut 		wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
17812262Stut 	for (i = 0; i < 4; i++)
17912262Stut 	{
18012262Stut 		if (wd[0][1] == keystr[i])
18112262Stut 		{
18212262Stut 			if (wd[0][1] == 'A')
18312262Stut 			{
18412262Stut 				if (oneauth && !multauth)	/* no repeat */
18512262Stut 					break;
18612262Stut 				else if (oneauth)		/* mult auths */
18712262Stut 					strcat(fld[i], "~~");
18812262Stut 				if (!endcomma(wd[n-2]))		/* surname */
18912262Stut 					strcat(fld[i], wd[n-1]);
19012262Stut 				else {				/* jr. or ed. */
19112262Stut 					strcat(fld[i], wd[n-2]);
19212262Stut 					n--;
19312262Stut 				}
19412262Stut 				strcat(fld[i], " ");
19512262Stut 				for (j = 1; j < n-1; j++)
19612262Stut 					strcat(fld[i], wd[j]);
19712262Stut 				oneauth = 1;
19812262Stut 			}
19912262Stut 			else if (wd[0][1] == 'D')
20012262Stut 			{
20112262Stut 				strcat(fld[i], wd[n-1]);	/* year */
20212262Stut 				if (n > 2)
20312262Stut 					strcat(fld[i], wd[1]);	/* month */
20412262Stut 			}
20512262Stut 			else if (wd[0][1] == 'T' || wd[0][1] == 'J')
20612262Stut 			{
20712262Stut 				j = 1;
20812262Stut 				if (article(wd[1]))	/* skip article */
20912262Stut 					j++;
21012262Stut 				for (; j < n; j++)
21112262Stut 					strcat(fld[i], wd[j]);
21212262Stut 			}
21312262Stut 			else  /* any other field */
21412262Stut 				for (j = 1; j < n; j++)
21512262Stut 					strcat(fld[i], wd[j]);
21612262Stut 		}
21712262Stut 		/* %Q quorporate or queer author - unreversed %A */
21812262Stut 		else if (wd[0][1] == 'Q' && keystr[i] == 'A')
21912262Stut 			for (j = 1; j < n; j++)
22012262Stut 				strcat(fld[i], wd[j]);
22112262Stut 	}
22212262Stut }
22312262Stut 
22412262Stut article(str)		/* see if string contains an article */
22512262Stut char *str;
22612262Stut {
22712262Stut 	if (strcmp("The", str) == 0)	/* English */
22812262Stut 		return(1);
22912262Stut 	if (strcmp("A", str) == 0)
23012262Stut 		return(1);
23112262Stut 	if (strcmp("An", str) == 0)
23212262Stut 		return(1);
23312262Stut 	if (strcmp("Le", str) == 0)	/* French */
23412262Stut 		return(1);
23512262Stut 	if (strcmp("La", str) == 0)
23612262Stut 		return(1);
23712262Stut 	if (strcmp("Der", str) == 0)	/* German */
23812262Stut 		return(1);
23912262Stut 	if (strcmp("Die", str) == 0)
24012262Stut 		return(1);
24112262Stut 	if (strcmp("Das", str) == 0)
24212262Stut 		return(1);
24312262Stut 	if (strcmp("El", str) == 0)	/* Spanish */
24412262Stut 		return(1);
24512262Stut 	if (strcmp("Den", str) == 0)	/* Scandinavian */
24612262Stut 		return(1);
24712262Stut 	return(0);
24812262Stut }
24912262Stut 
25012262Stut eval(keystr)		/* evaluate key string for A+ marking */
25112262Stut char keystr[];
25212262Stut {
25312262Stut 	int i, j;
25412262Stut 
25512262Stut 	for (i = 0, j = 0; keystr[i]; i++, j++)
25612262Stut 	{
25712262Stut 		if (keystr[i] == '+')
25812262Stut 		{
25912262Stut 			multauth = 1;
26012262Stut 			i++;
26112262Stut 		}
26212262Stut 		keystr[j] = keystr[i];
26312262Stut 	}
26412262Stut 	keystr[j] = NULL;
26512262Stut }
26612262Stut 
26712262Stut error(s)		/* exit in case of various system errors */
26812262Stut char *s;
26912262Stut {
27012262Stut 	perror(s);
27112262Stut 	exit(1);
27212262Stut }
27312262Stut 
27446849Sbostic void
27512262Stut onintr()		/* remove tempfile in case of interrupt */
27612262Stut {
27712262Stut 	fprintf(stderr, "\nInterrupt\n");
27812262Stut 	unlink(tempfile);
27912262Stut 	exit(1);
28012262Stut }
28112262Stut 
28212262Stut endcomma(str)
28312262Stut char *str;
28412262Stut {
28512262Stut 	int n;
28612262Stut 
28712262Stut 	n = strlen(str) - 1;
28812262Stut 	if (str[n] == ',')
28912262Stut 	{
29012262Stut 		str[n] = NULL;
29112262Stut 		return(1);
29212262Stut 	}
29312262Stut 	return(0);
29412262Stut }
295