xref: /csrg-svn/old/refer/sortbib/sortbib.c (revision 12262)
1*12262Stut #ifndef lint
2*12262Stut static char *sccsid = "@(#)sortbib.c	4.1 (Berkeley) 05/06/83";
3*12262Stut #endif
4*12262Stut 
5*12262Stut #include <stdio.h>
6*12262Stut #include <signal.h>
7*12262Stut #define BUF BUFSIZ
8*12262Stut #define MXFILES 16
9*12262Stut 
10*12262Stut char *tempfile;			/* temporary file for sorting keys */
11*12262Stut char *keystr = "AD";		/* default sorting on author and date */
12*12262Stut int multauth = 0;		/* by default sort on senior author only */
13*12262Stut int oneauth;			/* has there been author in the record? */
14*12262Stut 
15*12262Stut main(argc, argv)	/* sortbib: sort bibliographic database in place */
16*12262Stut int argc;
17*12262Stut char *argv[];
18*12262Stut {
19*12262Stut 	FILE *fp[MXFILES], *tfp, *fopen();
20*12262Stut 	int i, onintr();
21*12262Stut 	char *mktemp();
22*12262Stut 
23*12262Stut 	if (argc == 1)		/* can't use stdin for seeking anyway */
24*12262Stut 	{
25*12262Stut 		puts("Usage:  sortbib [-sKEYS] database [...]");
26*12262Stut 		puts("\t-s: sort by fields in KEYS (default is AD)");
27*12262Stut 		exit(1);
28*12262Stut 	}
29*12262Stut 	if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
30*12262Stut 	{
31*12262Stut 		keystr = argv[1]+2;
32*12262Stut 		eval(keystr);		/* evaluate A+ for multiple authors */
33*12262Stut 		argv++; argc--;
34*12262Stut 	}
35*12262Stut 	if (argc > MXFILES+1)	/* too many open file streams */
36*12262Stut 	{
37*12262Stut 		fprintf(stderr,
38*12262Stut 		"sortbib: More than %d databases specified\n", MXFILES);
39*12262Stut 		exit(1);
40*12262Stut 	}
41*12262Stut 	for (i = 1; i < argc; i++)		/* open files in arg list */
42*12262Stut 		if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
43*12262Stut 			error(argv[i]);
44*12262Stut 	tempfile = "/tmp/SbibXXXXX";		/* tempfile for sorting keys */
45*12262Stut 	mktemp(tempfile);
46*12262Stut 	if (signal(SIGINT,SIG_IGN) != SIG_IGN)	/* remove if interrupted */
47*12262Stut 		signal(SIGINT, onintr);
48*12262Stut 	if ((tfp = fopen(tempfile, "w")) == NULL)
49*12262Stut 		error(tempfile);
50*12262Stut 	for (i = 0; i < argc-1; i++)		/* read keys from bib files */
51*12262Stut 		sortbib(fp[i], tfp, i);
52*12262Stut 	fclose(tfp);
53*12262Stut 	deliver(fp, tfp);	/* do disk seeks and read from biblio files */
54*12262Stut 	unlink(tempfile);
55*12262Stut 	exit(0);
56*12262Stut }
57*12262Stut 
58*12262Stut int rsmode = 0;		/* record separator: 1 = null line, 2 = bracket */
59*12262Stut 
60*12262Stut sortbib(fp, tfp, i)	/* read records, prepare list for sorting */
61*12262Stut FILE *fp, *tfp;
62*12262Stut int i;
63*12262Stut {
64*12262Stut 	long offset, lastoffset = 0, ftell();	/* byte offsets in file */
65*12262Stut 	int length, newrec, recno = 0;		/* reclen, new rec'd?, number */
66*12262Stut 	char line[BUF], fld[4][BUF];		/* one line, the sort fields */
67*12262Stut 
68*12262Stut 	/* measure byte offset, then get new line */
69*12262Stut 	while (offset = ftell(fp), fgets(line, BUF, fp))
70*12262Stut 	{
71*12262Stut 		if (recno == 0)		/* accept record w/o initial newline */
72*12262Stut 			newrec = 1;
73*12262Stut 		if (line[0] == '\n')	/* accept null line record separator */
74*12262Stut 		{
75*12262Stut 			if (!rsmode)
76*12262Stut 				rsmode = 1;	/* null line mode */
77*12262Stut 			if (rsmode == 1)
78*12262Stut 				newrec = 1;
79*12262Stut 		}
80*12262Stut 		if (line[0] == '.' && line[1] == '[')	/* also accept .[ .] */
81*12262Stut 		{
82*12262Stut 			if (!rsmode)
83*12262Stut 				rsmode = 2;	/* bracket pair mode */
84*12262Stut 			if (rsmode == 2)
85*12262Stut 				newrec = 1;
86*12262Stut 		}
87*12262Stut 		if (newrec)		/* by whatever means above */
88*12262Stut 		{
89*12262Stut 			newrec = 0;
90*12262Stut 			length = offset - lastoffset;	/* measure rec len */
91*12262Stut 			if (length > BUF*8)
92*12262Stut 			{
93*12262Stut 				fprintf(stderr,
94*12262Stut 				"sortbib: record %d longer than %d (%d)\n",
95*12262Stut 					recno, BUF*8, length);
96*12262Stut 				exit(1);
97*12262Stut 			}
98*12262Stut 			if (recno++)			/* info for sorting */
99*12262Stut 			{
100*12262Stut 				fprintf(tfp, "%d %D %d : %s %s %s %s\n",
101*12262Stut 					i, lastoffset, length,
102*12262Stut 					fld[0], fld[1], fld[2], fld[3]);
103*12262Stut 				if (ferror(tfp))
104*12262Stut 					error(tempfile);
105*12262Stut 			}
106*12262Stut 			*fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
107*12262Stut 			oneauth = 0;		/* reset number of authors */
108*12262Stut 			lastoffset = offset;	/* save for next time */
109*12262Stut 		}
110*12262Stut 		if (line[0] == '%')	/* parse out fields to be sorted */
111*12262Stut 			parse(line, fld);
112*12262Stut 	}
113*12262Stut 	offset = ftell(fp);		/* measure byte offset at EOF */
114*12262Stut 	length = offset - lastoffset;	/* measure final record length */
115*12262Stut 	if (length > BUF*8)
116*12262Stut 	{
117*12262Stut 		fprintf(stderr, "sortbib: record %d longer than %d (%d)\n",
118*12262Stut 			recno, BUF*8, length);
119*12262Stut 		exit(1);
120*12262Stut 	}
121*12262Stut 	if (line[0] != '\n')		/* ignore null line just before EOF */
122*12262Stut 	{
123*12262Stut 		fprintf(tfp, "%d %D %d : %s %s %s %s\n",
124*12262Stut 			i, lastoffset, length,
125*12262Stut 			fld[0], fld[1], fld[2], fld[3]);
126*12262Stut 		if (ferror(tfp))
127*12262Stut 			error(tempfile);	/* disk error in /tmp */
128*12262Stut 	}
129*12262Stut }
130*12262Stut 
131*12262Stut deliver(fp, tfp)	/* deliver sorted entries out of database(s) */
132*12262Stut FILE *fp[], *tfp;
133*12262Stut {
134*12262Stut 	char str[BUF], buff[BUF*8];	/* for tempfile & databases */
135*12262Stut 	char cmd[80], *sprintf();	/* for using system sort command */
136*12262Stut 	long int offset;
137*12262Stut 	int i, length;
138*12262Stut 
139*12262Stut 	/* when sorting, ignore case distinctions; tab char is ':' */
140*12262Stut 	sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile);
141*12262Stut 	if (system(cmd) == 127)
142*12262Stut 		error("sortbib");
143*12262Stut 	tfp = fopen(tempfile, "r");
144*12262Stut 	while (fgets(str, sizeof(str), tfp))
145*12262Stut 	{
146*12262Stut 		/* get file pointer, record offset, and length */
147*12262Stut 		if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3)
148*12262Stut 			error("sortbib: sorting error");
149*12262Stut 		/* seek to proper disk location in proper file */
150*12262Stut 		if (fseek(fp[i], offset, 0) == -1)
151*12262Stut 			error("sortbib");
152*12262Stut 		/* read exactly one record from bibliography */
153*12262Stut 		if (fread(buff, sizeof(*buff), length, fp[i]) == 0)
154*12262Stut 			error("sortbib");
155*12262Stut 		/* add newline between unseparated records */
156*12262Stut 		if (buff[0] != '\n' && rsmode == 1)
157*12262Stut 			putchar('\n');
158*12262Stut 		/* write record buffer to standard output */
159*12262Stut 		if (fwrite(buff, sizeof(*buff), length, stdout) == 0)
160*12262Stut 			error("sortbib");
161*12262Stut 	}
162*12262Stut }
163*12262Stut 
164*12262Stut parse(line, fld)	/* get fields out of line, prepare for sorting */
165*12262Stut char line[];
166*12262Stut char fld[][BUF];
167*12262Stut {
168*12262Stut 	char wd[8][BUF/4], *strcat();
169*12262Stut 	int n, i, j;
170*12262Stut 
171*12262Stut 	for (i = 0; i < 8; i++)		/* zap out old strings */
172*12262Stut 		*wd[i] = NULL;
173*12262Stut 	n = sscanf(line, "%s %s %s %s %s %s %s %s",
174*12262Stut 		wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
175*12262Stut 	for (i = 0; i < 4; i++)
176*12262Stut 	{
177*12262Stut 		if (wd[0][1] == keystr[i])
178*12262Stut 		{
179*12262Stut 			if (wd[0][1] == 'A')
180*12262Stut 			{
181*12262Stut 				if (oneauth && !multauth)	/* no repeat */
182*12262Stut 					break;
183*12262Stut 				else if (oneauth)		/* mult auths */
184*12262Stut 					strcat(fld[i], "~~");
185*12262Stut 				if (!endcomma(wd[n-2]))		/* surname */
186*12262Stut 					strcat(fld[i], wd[n-1]);
187*12262Stut 				else {				/* jr. or ed. */
188*12262Stut 					strcat(fld[i], wd[n-2]);
189*12262Stut 					n--;
190*12262Stut 				}
191*12262Stut 				strcat(fld[i], " ");
192*12262Stut 				for (j = 1; j < n-1; j++)
193*12262Stut 					strcat(fld[i], wd[j]);
194*12262Stut 				oneauth = 1;
195*12262Stut 			}
196*12262Stut 			else if (wd[0][1] == 'D')
197*12262Stut 			{
198*12262Stut 				strcat(fld[i], wd[n-1]);	/* year */
199*12262Stut 				if (n > 2)
200*12262Stut 					strcat(fld[i], wd[1]);	/* month */
201*12262Stut 			}
202*12262Stut 			else if (wd[0][1] == 'T' || wd[0][1] == 'J')
203*12262Stut 			{
204*12262Stut 				j = 1;
205*12262Stut 				if (article(wd[1]))	/* skip article */
206*12262Stut 					j++;
207*12262Stut 				for (; j < n; j++)
208*12262Stut 					strcat(fld[i], wd[j]);
209*12262Stut 			}
210*12262Stut 			else  /* any other field */
211*12262Stut 				for (j = 1; j < n; j++)
212*12262Stut 					strcat(fld[i], wd[j]);
213*12262Stut 		}
214*12262Stut 		/* %Q quorporate or queer author - unreversed %A */
215*12262Stut 		else if (wd[0][1] == 'Q' && keystr[i] == 'A')
216*12262Stut 			for (j = 1; j < n; j++)
217*12262Stut 				strcat(fld[i], wd[j]);
218*12262Stut 	}
219*12262Stut }
220*12262Stut 
221*12262Stut article(str)		/* see if string contains an article */
222*12262Stut char *str;
223*12262Stut {
224*12262Stut 	if (strcmp("The", str) == 0)	/* English */
225*12262Stut 		return(1);
226*12262Stut 	if (strcmp("A", str) == 0)
227*12262Stut 		return(1);
228*12262Stut 	if (strcmp("An", str) == 0)
229*12262Stut 		return(1);
230*12262Stut 	if (strcmp("Le", str) == 0)	/* French */
231*12262Stut 		return(1);
232*12262Stut 	if (strcmp("La", str) == 0)
233*12262Stut 		return(1);
234*12262Stut 	if (strcmp("Der", str) == 0)	/* German */
235*12262Stut 		return(1);
236*12262Stut 	if (strcmp("Die", str) == 0)
237*12262Stut 		return(1);
238*12262Stut 	if (strcmp("Das", str) == 0)
239*12262Stut 		return(1);
240*12262Stut 	if (strcmp("El", str) == 0)	/* Spanish */
241*12262Stut 		return(1);
242*12262Stut 	if (strcmp("Den", str) == 0)	/* Scandinavian */
243*12262Stut 		return(1);
244*12262Stut 	return(0);
245*12262Stut }
246*12262Stut 
247*12262Stut eval(keystr)		/* evaluate key string for A+ marking */
248*12262Stut char keystr[];
249*12262Stut {
250*12262Stut 	int i, j;
251*12262Stut 
252*12262Stut 	for (i = 0, j = 0; keystr[i]; i++, j++)
253*12262Stut 	{
254*12262Stut 		if (keystr[i] == '+')
255*12262Stut 		{
256*12262Stut 			multauth = 1;
257*12262Stut 			i++;
258*12262Stut 		}
259*12262Stut 		keystr[j] = keystr[i];
260*12262Stut 	}
261*12262Stut 	keystr[j] = NULL;
262*12262Stut }
263*12262Stut 
264*12262Stut error(s)		/* exit in case of various system errors */
265*12262Stut char *s;
266*12262Stut {
267*12262Stut 	perror(s);
268*12262Stut 	exit(1);
269*12262Stut }
270*12262Stut 
271*12262Stut onintr()		/* remove tempfile in case of interrupt */
272*12262Stut {
273*12262Stut 	fprintf(stderr, "\nInterrupt\n");
274*12262Stut 	unlink(tempfile);
275*12262Stut 	exit(1);
276*12262Stut }
277*12262Stut 
278*12262Stut endcomma(str)
279*12262Stut char *str;
280*12262Stut {
281*12262Stut 	int n;
282*12262Stut 
283*12262Stut 	n = strlen(str) - 1;
284*12262Stut 	if (str[n] == ',')
285*12262Stut 	{
286*12262Stut 		str[n] = NULL;
287*12262Stut 		return(1);
288*12262Stut 	}
289*12262Stut 	return(0);
290*12262Stut }
291