1*60905Sbostic /*- 2*60905Sbostic * Copyright (c) 1993 The Regents of the University of California. 3*60905Sbostic * All rights reserved. 4*60905Sbostic * 5*60905Sbostic * This code is derived from software contributed to Berkeley by 6*60905Sbostic * Peter McIlroy. 7*60905Sbostic * 8*60905Sbostic * %sccs.include.redist.c% 9*60905Sbostic */ 10*60905Sbostic 11*60905Sbostic #ifndef lint 12*60905Sbostic static char sccsid[] = "@(#)fsort.c 5.1 (Berkeley) 06/01/93"; 13*60905Sbostic #endif /* not lint */ 14*60905Sbostic 15*60905Sbostic /* 16*60905Sbostic * Read in the next bin. If it fits in one segment sort it; 17*60905Sbostic * otherwise refine it by segment deeper by one character, 18*60905Sbostic * and try again on smaller bins. Sort the final bin at this level 19*60905Sbostic * of recursion to keep the head of fstack at 0. 20*60905Sbostic * After PANIC passes, abort to merge sort. 21*60905Sbostic */ 22*60905Sbostic #include "sort.h" 23*60905Sbostic #include "fsort.h" 24*60905Sbostic 25*60905Sbostic #include <stdlib.h> 26*60905Sbostic #include <string.h> 27*60905Sbostic 28*60905Sbostic u_char **keylist = 0, *buffer = 0, *linebuf = 0; 29*60905Sbostic struct tempfile fstack[MAXFCT]; 30*60905Sbostic extern char *toutpath; 31*60905Sbostic #define FSORTMAX 4 32*60905Sbostic int PANIC = FSORTMAX; 33*60905Sbostic 34*60905Sbostic void 35*60905Sbostic fsort(binno, depth, infiles, nfiles, outfd, ftbl) 36*60905Sbostic register int binno, depth, nfiles; 37*60905Sbostic register union f_handle infiles; 38*60905Sbostic FILE *outfd; 39*60905Sbostic register struct field *ftbl; 40*60905Sbostic { 41*60905Sbostic register u_char *bufend, **keypos, *tmpbuf; 42*60905Sbostic u_char *weights; 43*60905Sbostic int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0; 44*60905Sbostic register int c, nelem; 45*60905Sbostic long sizes [NBINS+1]; 46*60905Sbostic union f_handle tfiles, mstart = {MAXFCT-16}; 47*60905Sbostic register int (*get)(int, union f_handle, int, RECHEADER *, 48*60905Sbostic u_char *, struct field *); 49*60905Sbostic register struct recheader *crec; 50*60905Sbostic struct field tfield[2]; 51*60905Sbostic FILE *prevfd, *tailfd[FSORTMAX+1]; 52*60905Sbostic 53*60905Sbostic memset(tailfd, 0, sizeof(tailfd)); 54*60905Sbostic prevfd = outfd; 55*60905Sbostic memset(tfield, 0, sizeof(tfield)); 56*60905Sbostic if (ftbl[0].flags & R) 57*60905Sbostic tfield[0].weights = Rascii; 58*60905Sbostic else 59*60905Sbostic tfield[0].weights = ascii; 60*60905Sbostic tfield[0].icol.num = 1; 61*60905Sbostic weights = ftbl[0].weights; 62*60905Sbostic if (!buffer) { 63*60905Sbostic buffer = malloc(BUFSIZE); 64*60905Sbostic keylist = malloc(MAXNUM * sizeof(u_char *)); 65*60905Sbostic if (!SINGL_FLD) 66*60905Sbostic linebuf = malloc(MAXLLEN); 67*60905Sbostic } 68*60905Sbostic bufend = buffer + BUFSIZE; 69*60905Sbostic if (binno >= 0) { 70*60905Sbostic tfiles.top = infiles.top + nfiles; 71*60905Sbostic get = getnext; 72*60905Sbostic } else { 73*60905Sbostic tfiles.top = 0; 74*60905Sbostic if (SINGL_FLD) 75*60905Sbostic get = makeline; 76*60905Sbostic else 77*60905Sbostic get = makekey; 78*60905Sbostic } 79*60905Sbostic for (;;) { 80*60905Sbostic memset(sizes, 0, sizeof(sizes)); 81*60905Sbostic c = ntfiles = 0; 82*60905Sbostic if (binno == weights[REC_D] && 83*60905Sbostic !(SINGL_FLD && ftbl[0].flags & F)) { /* pop */ 84*60905Sbostic rd_append(weights[REC_D], 85*60905Sbostic infiles, nfiles, prevfd, buffer, bufend); 86*60905Sbostic break; 87*60905Sbostic } else if (binno == weights[REC_D]) { 88*60905Sbostic depth = 0; /* start over on flat weights */ 89*60905Sbostic ftbl = tfield; 90*60905Sbostic weights = ftbl[0].weights; 91*60905Sbostic } 92*60905Sbostic while (c != EOF) { 93*60905Sbostic keypos = keylist; 94*60905Sbostic nelem = 0; 95*60905Sbostic crec = (RECHEADER *) buffer; 96*60905Sbostic while((c = get(binno, infiles, nfiles, crec, bufend, 97*60905Sbostic ftbl)) == 0) { 98*60905Sbostic *keypos++ = crec->data + depth; 99*60905Sbostic if (++nelem == MAXNUM) { 100*60905Sbostic c = BUFFEND; 101*60905Sbostic break; 102*60905Sbostic } 103*60905Sbostic crec =(RECHEADER *) ((char *) crec + 104*60905Sbostic SALIGN(crec->length) + sizeof(TRECHEADER)); 105*60905Sbostic } 106*60905Sbostic if (c == BUFFEND || ntfiles || mfct) { /* push */ 107*60905Sbostic if (panic >= PANIC) { 108*60905Sbostic fstack[MAXFCT-16+mfct].fd = ftmp(); 109*60905Sbostic if (radixsort(keylist, nelem, weights, 110*60905Sbostic REC_D)) 111*60905Sbostic err(2, NULL); 112*60905Sbostic append(keylist, nelem, depth, fstack[ 113*60905Sbostic MAXFCT-16+mfct].fd, putrec, ftbl); 114*60905Sbostic mfct++; 115*60905Sbostic /* reduce number of open files */ 116*60905Sbostic if (mfct == 16 ||(c == EOF && ntfiles)) { 117*60905Sbostic tmpbuf = malloc(bufend - 118*60905Sbostic crec->data); 119*60905Sbostic memmove(tmpbuf, crec->data, 120*60905Sbostic bufend - crec->data); 121*60905Sbostic fstack[tfiles.top + ntfiles].fd 122*60905Sbostic = ftmp(); 123*60905Sbostic fmerge(0, mstart, mfct, geteasy, 124*60905Sbostic fstack[tfiles.top+ntfiles].fd, 125*60905Sbostic putrec, ftbl); 126*60905Sbostic ++ntfiles; 127*60905Sbostic mfct = 0; 128*60905Sbostic memmove(crec->data, tmpbuf, 129*60905Sbostic bufend - crec->data); 130*60905Sbostic free(tmpbuf); 131*60905Sbostic } 132*60905Sbostic } else { 133*60905Sbostic fstack[tfiles.top + ntfiles].fd= ftmp(); 134*60905Sbostic onepass(keylist, depth, nelem, sizes, 135*60905Sbostic weights, fstack[tfiles.top+ntfiles].fd); 136*60905Sbostic ++ntfiles; 137*60905Sbostic } 138*60905Sbostic } 139*60905Sbostic } 140*60905Sbostic get = getnext; 141*60905Sbostic if (!ntfiles && !mfct) { /* everything in memory--pop */ 142*60905Sbostic if (nelem > 1) 143*60905Sbostic if (radixsort(keylist, nelem, weights, REC_D)) 144*60905Sbostic err(2, NULL); 145*60905Sbostic append(keylist, nelem, depth, outfd, putline, ftbl); 146*60905Sbostic break; /* pop */ 147*60905Sbostic } 148*60905Sbostic if (panic >= PANIC) { 149*60905Sbostic if (!ntfiles) 150*60905Sbostic fmerge(0, mstart, mfct, geteasy, 151*60905Sbostic outfd, putline, ftbl); 152*60905Sbostic else 153*60905Sbostic fmerge(0, tfiles, ntfiles, geteasy, 154*60905Sbostic outfd, putline, ftbl); 155*60905Sbostic break; 156*60905Sbostic 157*60905Sbostic } 158*60905Sbostic total = maxb = lastb = 0; /* find if one bin dominates */ 159*60905Sbostic for (i = 0; i < NBINS; i++) 160*60905Sbostic if (sizes[i]) { 161*60905Sbostic if (sizes[i] > sizes[maxb]) 162*60905Sbostic maxb = i; 163*60905Sbostic lastb = i; 164*60905Sbostic total += sizes[i]; 165*60905Sbostic } 166*60905Sbostic if (sizes[maxb] < max((total / 2) , BUFSIZE)) 167*60905Sbostic maxb = lastb; /* otherwise pop after last bin */ 168*60905Sbostic fstack[tfiles.top].lastb = lastb; 169*60905Sbostic fstack[tfiles.top].maxb = maxb; 170*60905Sbostic 171*60905Sbostic /* start refining next level. */ 172*60905Sbostic get(-1, tfiles, ntfiles, crec, bufend, 0); /* rewind */ 173*60905Sbostic for (i = 0; i < maxb; i++) { 174*60905Sbostic if (!sizes[i]) /* bin empty; step ahead file offset */ 175*60905Sbostic get(i, tfiles, ntfiles, crec, bufend, 0); 176*60905Sbostic else 177*60905Sbostic fsort(i, depth+1, tfiles, ntfiles, outfd, ftbl); 178*60905Sbostic } 179*60905Sbostic if (lastb != maxb) { 180*60905Sbostic if (prevfd != outfd) 181*60905Sbostic tailfd[panic] = prevfd; 182*60905Sbostic prevfd = ftmp(); 183*60905Sbostic for (i = maxb+1; i <= lastb; i++) 184*60905Sbostic if (!sizes[i]) 185*60905Sbostic get(i, tfiles, ntfiles, crec, bufend,0); 186*60905Sbostic else 187*60905Sbostic fsort(i, depth+1, tfiles, ntfiles, 188*60905Sbostic prevfd, ftbl); 189*60905Sbostic } 190*60905Sbostic 191*60905Sbostic /* sort biggest (or last) bin at this level */ 192*60905Sbostic depth++; 193*60905Sbostic panic++; 194*60905Sbostic binno = maxb; 195*60905Sbostic infiles.top = tfiles.top; /* getnext will free tfiles, */ 196*60905Sbostic nfiles = ntfiles; /* so overwrite them */ 197*60905Sbostic } 198*60905Sbostic if (prevfd != outfd) { 199*60905Sbostic concat(outfd, prevfd); 200*60905Sbostic fclose(prevfd); 201*60905Sbostic } 202*60905Sbostic for (i = panic; i >= 0; --i) 203*60905Sbostic if (tailfd[i]) { 204*60905Sbostic concat(outfd, tailfd[i]); 205*60905Sbostic fclose(tailfd[i]); 206*60905Sbostic } 207*60905Sbostic } 208*60905Sbostic 209*60905Sbostic /* 210*60905Sbostic This is one pass of radix exchange, dumping the bins to disk. 211*60905Sbostic */ 212*60905Sbostic #define swap(a, b, t) t = a, a = b, b = t 213*60905Sbostic void 214*60905Sbostic onepass(a, depth, n, sizes, tr, fd) 215*60905Sbostic u_char **a; 216*60905Sbostic int depth; 217*60905Sbostic long n, sizes[]; 218*60905Sbostic u_char *tr; 219*60905Sbostic FILE *fd; 220*60905Sbostic { 221*60905Sbostic long tsizes[NBINS+1]; 222*60905Sbostic u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp; 223*60905Sbostic static histo[256]; 224*60905Sbostic int *hp; 225*60905Sbostic register int c; 226*60905Sbostic u_char **an, *t, **aj; 227*60905Sbostic register u_char **ak, *r; 228*60905Sbostic 229*60905Sbostic memset(tsizes, 0, sizeof(tsizes)); 230*60905Sbostic depth += sizeof(TRECHEADER); 231*60905Sbostic an = a + n; 232*60905Sbostic for (ak = a; ak < an; ak++) { 233*60905Sbostic histo[c = tr[**ak]]++; 234*60905Sbostic tsizes[c] += ((RECHEADER *) (*ak -= depth))->length; 235*60905Sbostic } 236*60905Sbostic 237*60905Sbostic bin[0] = a; 238*60905Sbostic bpmax = bin + 256; 239*60905Sbostic tp = top, hp = histo; 240*60905Sbostic for (bp = bin; bp < bpmax; bp++) { 241*60905Sbostic *tp++ = *(bp+1) = *bp + (c = *hp); 242*60905Sbostic *hp++ = 0; 243*60905Sbostic if (c <= 1) 244*60905Sbostic continue; 245*60905Sbostic } 246*60905Sbostic for(aj = a; aj < an; *aj = r, aj = bin[c+1]) 247*60905Sbostic for(r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;) 248*60905Sbostic swap(*ak, r, t); 249*60905Sbostic 250*60905Sbostic for (ak = a, c = 0; c < 256; c++) { 251*60905Sbostic an = bin[c+1]; 252*60905Sbostic n = an - ak; 253*60905Sbostic tsizes[c] += n * sizeof(TRECHEADER); 254*60905Sbostic /* tell getnext how many elements in this bin, this segment. */ 255*60905Sbostic EWRITE(tsizes+c, sizeof(long), 1, fd); 256*60905Sbostic sizes[c] += tsizes[c]; 257*60905Sbostic for (; ak < an; ++ak) 258*60905Sbostic putrec((RECHEADER *) *ak, fd); 259*60905Sbostic } 260*60905Sbostic } 261