1*12262Stut #ifndef lint 2*12262Stut static char *sccsid = "@(#)sortbib.c 4.1 (Berkeley) 05/06/83"; 3*12262Stut #endif 4*12262Stut 5*12262Stut #include <stdio.h> 6*12262Stut #include <signal.h> 7*12262Stut #define BUF BUFSIZ 8*12262Stut #define MXFILES 16 9*12262Stut 10*12262Stut char *tempfile; /* temporary file for sorting keys */ 11*12262Stut char *keystr = "AD"; /* default sorting on author and date */ 12*12262Stut int multauth = 0; /* by default sort on senior author only */ 13*12262Stut int oneauth; /* has there been author in the record? */ 14*12262Stut 15*12262Stut main(argc, argv) /* sortbib: sort bibliographic database in place */ 16*12262Stut int argc; 17*12262Stut char *argv[]; 18*12262Stut { 19*12262Stut FILE *fp[MXFILES], *tfp, *fopen(); 20*12262Stut int i, onintr(); 21*12262Stut char *mktemp(); 22*12262Stut 23*12262Stut if (argc == 1) /* can't use stdin for seeking anyway */ 24*12262Stut { 25*12262Stut puts("Usage: sortbib [-sKEYS] database [...]"); 26*12262Stut puts("\t-s: sort by fields in KEYS (default is AD)"); 27*12262Stut exit(1); 28*12262Stut } 29*12262Stut if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') 30*12262Stut { 31*12262Stut keystr = argv[1]+2; 32*12262Stut eval(keystr); /* evaluate A+ for multiple authors */ 33*12262Stut argv++; argc--; 34*12262Stut } 35*12262Stut if (argc > MXFILES+1) /* too many open file streams */ 36*12262Stut { 37*12262Stut fprintf(stderr, 38*12262Stut "sortbib: More than %d databases specified\n", MXFILES); 39*12262Stut exit(1); 40*12262Stut } 41*12262Stut for (i = 1; i < argc; i++) /* open files in arg list */ 42*12262Stut if ((fp[i-1] = fopen(argv[i], "r")) == NULL) 43*12262Stut error(argv[i]); 44*12262Stut tempfile = "/tmp/SbibXXXXX"; /* tempfile for sorting keys */ 45*12262Stut mktemp(tempfile); 46*12262Stut if (signal(SIGINT,SIG_IGN) != SIG_IGN) /* remove if interrupted */ 47*12262Stut signal(SIGINT, onintr); 48*12262Stut if ((tfp = fopen(tempfile, "w")) == NULL) 49*12262Stut error(tempfile); 50*12262Stut for (i = 0; i < argc-1; i++) /* read keys from bib files */ 51*12262Stut sortbib(fp[i], tfp, i); 52*12262Stut fclose(tfp); 53*12262Stut deliver(fp, tfp); /* do disk seeks and read from biblio files */ 54*12262Stut unlink(tempfile); 55*12262Stut exit(0); 56*12262Stut } 57*12262Stut 58*12262Stut int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */ 59*12262Stut 60*12262Stut sortbib(fp, tfp, i) /* read records, prepare list for sorting */ 61*12262Stut FILE *fp, *tfp; 62*12262Stut int i; 63*12262Stut { 64*12262Stut long offset, lastoffset = 0, ftell(); /* byte offsets in file */ 65*12262Stut int length, newrec, recno = 0; /* reclen, new rec'd?, number */ 66*12262Stut char line[BUF], fld[4][BUF]; /* one line, the sort fields */ 67*12262Stut 68*12262Stut /* measure byte offset, then get new line */ 69*12262Stut while (offset = ftell(fp), fgets(line, BUF, fp)) 70*12262Stut { 71*12262Stut if (recno == 0) /* accept record w/o initial newline */ 72*12262Stut newrec = 1; 73*12262Stut if (line[0] == '\n') /* accept null line record separator */ 74*12262Stut { 75*12262Stut if (!rsmode) 76*12262Stut rsmode = 1; /* null line mode */ 77*12262Stut if (rsmode == 1) 78*12262Stut newrec = 1; 79*12262Stut } 80*12262Stut if (line[0] == '.' && line[1] == '[') /* also accept .[ .] */ 81*12262Stut { 82*12262Stut if (!rsmode) 83*12262Stut rsmode = 2; /* bracket pair mode */ 84*12262Stut if (rsmode == 2) 85*12262Stut newrec = 1; 86*12262Stut } 87*12262Stut if (newrec) /* by whatever means above */ 88*12262Stut { 89*12262Stut newrec = 0; 90*12262Stut length = offset - lastoffset; /* measure rec len */ 91*12262Stut if (length > BUF*8) 92*12262Stut { 93*12262Stut fprintf(stderr, 94*12262Stut "sortbib: record %d longer than %d (%d)\n", 95*12262Stut recno, BUF*8, length); 96*12262Stut exit(1); 97*12262Stut } 98*12262Stut if (recno++) /* info for sorting */ 99*12262Stut { 100*12262Stut fprintf(tfp, "%d %D %d : %s %s %s %s\n", 101*12262Stut i, lastoffset, length, 102*12262Stut fld[0], fld[1], fld[2], fld[3]); 103*12262Stut if (ferror(tfp)) 104*12262Stut error(tempfile); 105*12262Stut } 106*12262Stut *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL; 107*12262Stut oneauth = 0; /* reset number of authors */ 108*12262Stut lastoffset = offset; /* save for next time */ 109*12262Stut } 110*12262Stut if (line[0] == '%') /* parse out fields to be sorted */ 111*12262Stut parse(line, fld); 112*12262Stut } 113*12262Stut offset = ftell(fp); /* measure byte offset at EOF */ 114*12262Stut length = offset - lastoffset; /* measure final record length */ 115*12262Stut if (length > BUF*8) 116*12262Stut { 117*12262Stut fprintf(stderr, "sortbib: record %d longer than %d (%d)\n", 118*12262Stut recno, BUF*8, length); 119*12262Stut exit(1); 120*12262Stut } 121*12262Stut if (line[0] != '\n') /* ignore null line just before EOF */ 122*12262Stut { 123*12262Stut fprintf(tfp, "%d %D %d : %s %s %s %s\n", 124*12262Stut i, lastoffset, length, 125*12262Stut fld[0], fld[1], fld[2], fld[3]); 126*12262Stut if (ferror(tfp)) 127*12262Stut error(tempfile); /* disk error in /tmp */ 128*12262Stut } 129*12262Stut } 130*12262Stut 131*12262Stut deliver(fp, tfp) /* deliver sorted entries out of database(s) */ 132*12262Stut FILE *fp[], *tfp; 133*12262Stut { 134*12262Stut char str[BUF], buff[BUF*8]; /* for tempfile & databases */ 135*12262Stut char cmd[80], *sprintf(); /* for using system sort command */ 136*12262Stut long int offset; 137*12262Stut int i, length; 138*12262Stut 139*12262Stut /* when sorting, ignore case distinctions; tab char is ':' */ 140*12262Stut sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile); 141*12262Stut if (system(cmd) == 127) 142*12262Stut error("sortbib"); 143*12262Stut tfp = fopen(tempfile, "r"); 144*12262Stut while (fgets(str, sizeof(str), tfp)) 145*12262Stut { 146*12262Stut /* get file pointer, record offset, and length */ 147*12262Stut if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3) 148*12262Stut error("sortbib: sorting error"); 149*12262Stut /* seek to proper disk location in proper file */ 150*12262Stut if (fseek(fp[i], offset, 0) == -1) 151*12262Stut error("sortbib"); 152*12262Stut /* read exactly one record from bibliography */ 153*12262Stut if (fread(buff, sizeof(*buff), length, fp[i]) == 0) 154*12262Stut error("sortbib"); 155*12262Stut /* add newline between unseparated records */ 156*12262Stut if (buff[0] != '\n' && rsmode == 1) 157*12262Stut putchar('\n'); 158*12262Stut /* write record buffer to standard output */ 159*12262Stut if (fwrite(buff, sizeof(*buff), length, stdout) == 0) 160*12262Stut error("sortbib"); 161*12262Stut } 162*12262Stut } 163*12262Stut 164*12262Stut parse(line, fld) /* get fields out of line, prepare for sorting */ 165*12262Stut char line[]; 166*12262Stut char fld[][BUF]; 167*12262Stut { 168*12262Stut char wd[8][BUF/4], *strcat(); 169*12262Stut int n, i, j; 170*12262Stut 171*12262Stut for (i = 0; i < 8; i++) /* zap out old strings */ 172*12262Stut *wd[i] = NULL; 173*12262Stut n = sscanf(line, "%s %s %s %s %s %s %s %s", 174*12262Stut wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]); 175*12262Stut for (i = 0; i < 4; i++) 176*12262Stut { 177*12262Stut if (wd[0][1] == keystr[i]) 178*12262Stut { 179*12262Stut if (wd[0][1] == 'A') 180*12262Stut { 181*12262Stut if (oneauth && !multauth) /* no repeat */ 182*12262Stut break; 183*12262Stut else if (oneauth) /* mult auths */ 184*12262Stut strcat(fld[i], "~~"); 185*12262Stut if (!endcomma(wd[n-2])) /* surname */ 186*12262Stut strcat(fld[i], wd[n-1]); 187*12262Stut else { /* jr. or ed. */ 188*12262Stut strcat(fld[i], wd[n-2]); 189*12262Stut n--; 190*12262Stut } 191*12262Stut strcat(fld[i], " "); 192*12262Stut for (j = 1; j < n-1; j++) 193*12262Stut strcat(fld[i], wd[j]); 194*12262Stut oneauth = 1; 195*12262Stut } 196*12262Stut else if (wd[0][1] == 'D') 197*12262Stut { 198*12262Stut strcat(fld[i], wd[n-1]); /* year */ 199*12262Stut if (n > 2) 200*12262Stut strcat(fld[i], wd[1]); /* month */ 201*12262Stut } 202*12262Stut else if (wd[0][1] == 'T' || wd[0][1] == 'J') 203*12262Stut { 204*12262Stut j = 1; 205*12262Stut if (article(wd[1])) /* skip article */ 206*12262Stut j++; 207*12262Stut for (; j < n; j++) 208*12262Stut strcat(fld[i], wd[j]); 209*12262Stut } 210*12262Stut else /* any other field */ 211*12262Stut for (j = 1; j < n; j++) 212*12262Stut strcat(fld[i], wd[j]); 213*12262Stut } 214*12262Stut /* %Q quorporate or queer author - unreversed %A */ 215*12262Stut else if (wd[0][1] == 'Q' && keystr[i] == 'A') 216*12262Stut for (j = 1; j < n; j++) 217*12262Stut strcat(fld[i], wd[j]); 218*12262Stut } 219*12262Stut } 220*12262Stut 221*12262Stut article(str) /* see if string contains an article */ 222*12262Stut char *str; 223*12262Stut { 224*12262Stut if (strcmp("The", str) == 0) /* English */ 225*12262Stut return(1); 226*12262Stut if (strcmp("A", str) == 0) 227*12262Stut return(1); 228*12262Stut if (strcmp("An", str) == 0) 229*12262Stut return(1); 230*12262Stut if (strcmp("Le", str) == 0) /* French */ 231*12262Stut return(1); 232*12262Stut if (strcmp("La", str) == 0) 233*12262Stut return(1); 234*12262Stut if (strcmp("Der", str) == 0) /* German */ 235*12262Stut return(1); 236*12262Stut if (strcmp("Die", str) == 0) 237*12262Stut return(1); 238*12262Stut if (strcmp("Das", str) == 0) 239*12262Stut return(1); 240*12262Stut if (strcmp("El", str) == 0) /* Spanish */ 241*12262Stut return(1); 242*12262Stut if (strcmp("Den", str) == 0) /* Scandinavian */ 243*12262Stut return(1); 244*12262Stut return(0); 245*12262Stut } 246*12262Stut 247*12262Stut eval(keystr) /* evaluate key string for A+ marking */ 248*12262Stut char keystr[]; 249*12262Stut { 250*12262Stut int i, j; 251*12262Stut 252*12262Stut for (i = 0, j = 0; keystr[i]; i++, j++) 253*12262Stut { 254*12262Stut if (keystr[i] == '+') 255*12262Stut { 256*12262Stut multauth = 1; 257*12262Stut i++; 258*12262Stut } 259*12262Stut keystr[j] = keystr[i]; 260*12262Stut } 261*12262Stut keystr[j] = NULL; 262*12262Stut } 263*12262Stut 264*12262Stut error(s) /* exit in case of various system errors */ 265*12262Stut char *s; 266*12262Stut { 267*12262Stut perror(s); 268*12262Stut exit(1); 269*12262Stut } 270*12262Stut 271*12262Stut onintr() /* remove tempfile in case of interrupt */ 272*12262Stut { 273*12262Stut fprintf(stderr, "\nInterrupt\n"); 274*12262Stut unlink(tempfile); 275*12262Stut exit(1); 276*12262Stut } 277*12262Stut 278*12262Stut endcomma(str) 279*12262Stut char *str; 280*12262Stut { 281*12262Stut int n; 282*12262Stut 283*12262Stut n = strlen(str) - 1; 284*12262Stut if (str[n] == ',') 285*12262Stut { 286*12262Stut str[n] = NULL; 287*12262Stut return(1); 288*12262Stut } 289*12262Stut return(0); 290*12262Stut } 291