160908Sbostic /*-
2*62247Sbostic * Copyright (c) 1993
3*62247Sbostic * The Regents of the University of California. All rights reserved.
460908Sbostic *
560908Sbostic * This code is derived from software contributed to Berkeley by
660908Sbostic * Peter McIlroy.
760908Sbostic *
860908Sbostic * %sccs.include.redist.c%
960908Sbostic */
1060908Sbostic
1160908Sbostic #ifndef lint
12*62247Sbostic static char sccsid[] = "@(#)msort.c 8.1 (Berkeley) 06/06/93";
1360908Sbostic #endif /* not lint */
1460908Sbostic
1560908Sbostic #include "sort.h"
1660908Sbostic #include "fsort.h"
1760908Sbostic
1860908Sbostic #include <stdlib.h>
1960908Sbostic #include <string.h>
2060908Sbostic #include <unistd.h>
2160908Sbostic
2260908Sbostic /* Subroutines using comparisons: merge sort and check order */
2360908Sbostic #define DELETE (1)
2460908Sbostic #define LALIGN(n) ((n+3) & ~3)
2560908Sbostic
2660908Sbostic typedef struct mfile {
2760908Sbostic u_char *end;
2860908Sbostic short flno;
2960908Sbostic struct recheader rec[1];
3060908Sbostic } MFILE;
3160908Sbostic typedef struct tmfile {
3260908Sbostic u_char *end;
3360908Sbostic short flno;
3460908Sbostic struct trecheader rec[1];
3560908Sbostic } TMFILE;
3660908Sbostic u_char *wts, *wts1 = 0;
3760908Sbostic struct mfile *cfilebuf;
3860908Sbostic
3960908Sbostic static int cmp __P((struct recheader *, struct recheader *));
4060908Sbostic static int insert __P((struct mfile **, struct mfile **, int, int));
4160908Sbostic
4260908Sbostic void
fmerge(binno,files,nfiles,get,outfd,fput,ftbl)4360908Sbostic fmerge(binno, files, nfiles, get, outfd, fput, ftbl)
4460908Sbostic union f_handle files;
4560908Sbostic int binno, nfiles;
4660908Sbostic int (*get)();
4760908Sbostic FILE *outfd;
4860908Sbostic void (*fput)();
4960908Sbostic struct field *ftbl;
5060908Sbostic {
5160908Sbostic FILE *tout;
5260908Sbostic int i, j, last;
5360908Sbostic void (*put)(struct recheader *, FILE *);
5460908Sbostic extern int geteasy();
5560908Sbostic struct tempfile *l_fstack;
5660908Sbostic
5760908Sbostic wts = ftbl->weights;
5860908Sbostic if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
5960908Sbostic wts1 = (ftbl->flags & R) ? Rascii : ascii;
6060908Sbostic if (!cfilebuf)
6160908Sbostic cfilebuf = malloc(MAXLLEN + sizeof(TMFILE));
6260908Sbostic
6360908Sbostic i = min(16, nfiles) * LALIGN(MAXLLEN+sizeof(TMFILE));
6460908Sbostic if (!buffer || i > BUFSIZE) {
6560908Sbostic buffer = buffer ? realloc(buffer, i) : malloc(i);
6660908Sbostic if (!buffer)
6760908Sbostic err(2, NULL);
6860908Sbostic if (!SINGL_FLD)
6960908Sbostic linebuf = malloc(MAXLLEN);
7060908Sbostic }
7160908Sbostic
7260908Sbostic if (binno >= 0)
7360908Sbostic l_fstack = fstack + files.top;
7460908Sbostic else
7560908Sbostic l_fstack = fstack;
7660908Sbostic while (nfiles) {
7760908Sbostic put = putrec;
7860908Sbostic for (j = 0; j < nfiles; j += 16) {
7960908Sbostic if (nfiles <= 16) {
8060908Sbostic tout = outfd;
8160908Sbostic put = fput;
8260908Sbostic }
8360908Sbostic else
8460908Sbostic tout = ftmp();
8560908Sbostic last = min(16, nfiles - j);
8660908Sbostic if (binno < 0) {
8760908Sbostic for (i = 0; i < last; i++)
8860908Sbostic if (!(l_fstack[i+MAXFCT-1-16].fd =
8960908Sbostic fopen(files.names[j + i], "r")))
9060908Sbostic err(2, "%s", files.names[j+i]);
9160908Sbostic merge(MAXFCT-1-16, last, get, tout, put, ftbl);
9260908Sbostic }
9360908Sbostic else {
9460908Sbostic for (i = 0; i< last; i++)
9560908Sbostic rewind(l_fstack[i+j].fd);
9660908Sbostic merge(files.top+j, last, get, tout, put, ftbl);
9760908Sbostic }
9860908Sbostic if (nfiles > 16) l_fstack[j/16].fd = tout;
9960908Sbostic }
10060908Sbostic nfiles = (nfiles + 15) / 16;
10160908Sbostic if (nfiles == 1)
10260908Sbostic nfiles = 0;
10360908Sbostic if (binno < 0) {
10460908Sbostic binno = 0;
10560908Sbostic get = geteasy;
10660908Sbostic files.top = 0;
10760908Sbostic }
10860908Sbostic }
10960908Sbostic }
11060908Sbostic
11160908Sbostic void
merge(infl0,nfiles,get,outfd,put,ftbl)11260908Sbostic merge(infl0, nfiles, get, outfd, put, ftbl)
11360908Sbostic int infl0, nfiles;
11460908Sbostic int (*get)();
11560908Sbostic void (*put)(struct recheader *, FILE *);
11660908Sbostic FILE *outfd;
11760908Sbostic struct field *ftbl;
11860908Sbostic {
11960908Sbostic int c, i, j;
12060908Sbostic union f_handle dummy = {0};
12160908Sbostic struct mfile *flist[16], *cfile;
12260908Sbostic for (i = j = 0; i < nfiles; i++) {
12360908Sbostic cfile = (MFILE *) (buffer +
12460908Sbostic i * LALIGN(MAXLLEN + sizeof(TMFILE)));
12560908Sbostic cfile->flno = j + infl0;
12660908Sbostic cfile->end = cfile->rec->data + MAXLLEN;
12760908Sbostic for (c = 1; c == 1;) {
12860908Sbostic if (EOF == (c = get(j+infl0, dummy, nfiles,
12960908Sbostic cfile->rec, cfile->end, ftbl))) {
13060908Sbostic --i;
13160908Sbostic --nfiles;
13260908Sbostic break;
13360908Sbostic }
13460908Sbostic if (i)
13560908Sbostic c = insert(flist, &cfile, i, !DELETE);
13660908Sbostic else
13760908Sbostic flist[0] = cfile;
13860908Sbostic }
13960908Sbostic j++;
14060908Sbostic }
14160908Sbostic cfile = cfilebuf;
14260908Sbostic cfile->flno = flist[0]->flno;
14360908Sbostic cfile->end = cfile->rec->data + MAXLLEN;
14460908Sbostic while (nfiles) {
14560908Sbostic for (c = 1; c == 1;) {
14660908Sbostic if (EOF == (c = get(cfile->flno, dummy, nfiles,
14760908Sbostic cfile->rec, cfile->end, ftbl))) {
14860908Sbostic put(flist[0]->rec, outfd);
14960908Sbostic memmove(flist, flist + 1,
15060908Sbostic sizeof(MFILE *) * (--nfiles));
15160908Sbostic cfile->flno = flist[0]->flno;
15260908Sbostic break;
15360908Sbostic }
15460908Sbostic if (!(c = insert(flist, &cfile, nfiles, DELETE)))
15560908Sbostic put(cfile->rec, outfd);
15660908Sbostic }
15760908Sbostic }
15860908Sbostic }
15960908Sbostic
16060908Sbostic /*
16160908Sbostic * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
16260908Sbostic * otherwise just inserts *rec in flist.
16360908Sbostic */
16460908Sbostic static int
insert(flist,rec,ttop,delete)16560908Sbostic insert(flist, rec, ttop, delete)
16660908Sbostic struct mfile **flist, **rec;
16760908Sbostic int delete, ttop; /* delete = 0 or 1 */
16860908Sbostic {
16960908Sbostic register struct mfile *tmprec;
17060908Sbostic register int top, mid, bot = 0, cmpv = 1;
17160908Sbostic tmprec = *rec;
17260908Sbostic top = ttop;
17360908Sbostic for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
17460908Sbostic cmpv = cmp(tmprec->rec, flist[mid]->rec);
17560908Sbostic if (cmpv < 0)
17660908Sbostic top = mid;
17760908Sbostic else if (cmpv > 0)
17860908Sbostic bot = mid;
17960908Sbostic else {
18060908Sbostic if (!UNIQUE)
18160908Sbostic bot = mid - 1;
18260908Sbostic break;
18360908Sbostic }
18460908Sbostic }
18560908Sbostic if (delete) {
18660908Sbostic if (UNIQUE) {
18760908Sbostic if (!bot && cmpv)
18860908Sbostic cmpv = cmp(tmprec->rec, flist[0]->rec);
18960908Sbostic if (!cmpv)
19060908Sbostic return(1);
19160908Sbostic }
19260908Sbostic tmprec = flist[0];
19360908Sbostic if (bot)
19460908Sbostic memmove(flist, flist+1, bot * sizeof(MFILE **));
19560908Sbostic flist[bot] = *rec;
19660908Sbostic *rec = tmprec;
19760908Sbostic (*rec)->flno = (*flist)->flno;
19860908Sbostic return (0);
19960908Sbostic }
20060908Sbostic else {
20160908Sbostic if (!bot && !(UNIQUE && !cmpv)) {
20260908Sbostic cmpv = cmp(tmprec->rec, flist[0]->rec);
20360908Sbostic if (cmpv < 0)
20460908Sbostic bot = -1;
20560908Sbostic }
20660908Sbostic if (UNIQUE && !cmpv)
20760908Sbostic return (1);
20860908Sbostic bot++;
20960908Sbostic memmove(flist + bot+1, flist + bot,
21060908Sbostic (ttop - bot) * sizeof(MFILE **));
21160908Sbostic flist[bot] = *rec;
21260908Sbostic return (0);
21360908Sbostic }
21460908Sbostic }
21560908Sbostic
21660908Sbostic /*
21760908Sbostic * check order on one file
21860908Sbostic */
21960908Sbostic void
order(infile,get,ftbl)22060908Sbostic order(infile, get, ftbl)
22160908Sbostic union f_handle infile;
22260908Sbostic int (*get)();
22360908Sbostic struct field *ftbl;
22460908Sbostic {
22560908Sbostic u_char *end;
22660908Sbostic int c;
22760908Sbostic struct recheader *crec, *prec, *trec;
22860908Sbostic
22960908Sbostic if (!SINGL_FLD)
23060908Sbostic linebuf = malloc(MAXLLEN);
23160908Sbostic buffer = malloc(2 * (MAXLLEN + sizeof(TRECHEADER)));
23260908Sbostic end = buffer + 2 * (MAXLLEN + sizeof(TRECHEADER));
23360908Sbostic crec = (RECHEADER *) buffer;
23460908Sbostic prec = (RECHEADER *) (buffer + MAXLLEN + sizeof(TRECHEADER));
23560908Sbostic wts = ftbl->weights;
23660908Sbostic if (SINGL_FLD && ftbl->flags & F)
23760908Sbostic wts1 = ftbl->flags & R ? Rascii : ascii;
23860908Sbostic else
23960908Sbostic wts1 = 0;
24060908Sbostic if (0 == get(-1, infile, 1, prec, end, ftbl))
24160908Sbostic while (0 == get(-1, infile, 1, crec, end, ftbl)) {
24260908Sbostic if (0 < (c = cmp(prec, crec))) {
24360908Sbostic crec->data[crec->length-1] = 0;
24460908Sbostic errx(1, "found disorder: %s", crec->data+crec->offset);
24560908Sbostic }
24660908Sbostic if (UNIQUE && !c) {
24760908Sbostic crec->data[crec->length-1] = 0;
24860908Sbostic errx(1, "found non-uniqueness: %s",
24960908Sbostic crec->data+crec->offset);
25060908Sbostic }
25160908Sbostic trec = prec;
25260908Sbostic prec = crec;
25360908Sbostic crec = trec;
25460908Sbostic }
25560908Sbostic exit(0);
25660908Sbostic }
25760908Sbostic
25860908Sbostic static int
cmp(rec1,rec2)25960908Sbostic cmp(rec1, rec2)
26060908Sbostic struct recheader *rec1, *rec2;
26160908Sbostic {
26260908Sbostic register r;
26360908Sbostic register u_char *pos1, *pos2, *end;
26460908Sbostic register u_char *cwts;
26560908Sbostic for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) {
26660908Sbostic pos1 = rec1->data;
26760908Sbostic pos2 = rec2->data;
26860908Sbostic if (!SINGL_FLD && UNIQUE)
26960908Sbostic end = pos1 + min(rec1->offset, rec2->offset);
27060908Sbostic else
27160908Sbostic end = pos1 + min(rec1->length, rec2->length);
27260908Sbostic for (; pos1 < end; ) {
27360908Sbostic if (r = cwts[*pos1++] - cwts[*pos2++])
27460908Sbostic return (r);
27560908Sbostic }
27660908Sbostic }
27760908Sbostic return (0);
27860908Sbostic }
279