1*56962Sbostic /*- 2*56962Sbostic * Copyright (c) 1992 The Regents of the University of California. 3*56962Sbostic * All rights reserved. 4*56962Sbostic * 5*56962Sbostic * This code is derived from software contributed to Berkeley by 6*56962Sbostic * Peter McIlroy. 7*56962Sbostic * 8*56962Sbostic * %sccs.include.redist.c% 9*56962Sbostic */ 10*56962Sbostic 11*56962Sbostic #if defined(LIBC_SCCS) && !defined(lint) 12*56962Sbostic static char sccsid[] = "@(#)merge.c 5.1 (Berkeley) 12/02/92"; 13*56962Sbostic #endif /* LIBC_SCCS and not lint */ 14*56962Sbostic 15*56962Sbostic /* 16*56962Sbostic * Hybrid exponential search/linear search merge sort with hybrid 17*56962Sbostic * natural/pairwise first pass. Requires about .3% more comparisons 18*56962Sbostic * for random data than LSMS with pairwise first pass alone. 19*56962Sbostic * It works for objects as small as two bytes. 20*56962Sbostic */ 21*56962Sbostic 22*56962Sbostic #define NATURAL 23*56962Sbostic #define THRESHOLD 16 /* Best choice for natural merge cut-off. */ 24*56962Sbostic 25*56962Sbostic /* #define NATURAL to get hybrid natural merge. 26*56962Sbostic * (The default is pairwise merging.) 27*56962Sbostic */ 28*56962Sbostic 29*56962Sbostic #include <sys/types.h> 30*56962Sbostic 31*56962Sbostic #include <errno.h> 32*56962Sbostic #include <stdlib.h> 33*56962Sbostic #include <string.h> 34*56962Sbostic 35*56962Sbostic static void setup __P((u_char *, u_char *, size_t, size_t, int (*)())); 36*56962Sbostic static void insertionsort __P((u_char *, size_t, size_t, int (*)())); 37*56962Sbostic 38*56962Sbostic #define ISIZE sizeof(int) 39*56962Sbostic #define PSIZE sizeof(u_char *) 40*56962Sbostic #define ICOPY_LIST(src, dst, last) \ 41*56962Sbostic do \ 42*56962Sbostic *(int*)dst = *(int*)src, src += ISIZE, dst += ISIZE; \ 43*56962Sbostic while(src < last) 44*56962Sbostic #define ICOPY_ELT(src, dst, i) \ 45*56962Sbostic do \ 46*56962Sbostic *(int*) dst = *(int*) src, src += ISIZE, dst += ISIZE; \ 47*56962Sbostic while (i -= ISIZE) 48*56962Sbostic 49*56962Sbostic #define CCOPY_LIST(src, dst, last) \ 50*56962Sbostic do \ 51*56962Sbostic *dst++ = *src++; \ 52*56962Sbostic while (src < last) 53*56962Sbostic #define CCOPY_ELT(src, dst, i) \ 54*56962Sbostic do \ 55*56962Sbostic *dst++ = *src++; \ 56*56962Sbostic while (i -= 1) 57*56962Sbostic 58*56962Sbostic /* 59*56962Sbostic * Find the next possible pointer head. (Trickery for forcing an array 60*56962Sbostic * to do double duty as a linked list when objects do not align with word 61*56962Sbostic * boundaries. 62*56962Sbostic */ 63*56962Sbostic /* Assumption: PSIZE is a power of 2. */ 64*56962Sbostic #define EVAL(p) (u_char **) \ 65*56962Sbostic ((u_char *)0 + \ 66*56962Sbostic (((u_char *)p + PSIZE - 1 - (u_char *) 0) & ~(PSIZE - 1))) 67*56962Sbostic 68*56962Sbostic /* 69*56962Sbostic * Arguments are as for qsort. 70*56962Sbostic */ 71*56962Sbostic int 72*56962Sbostic mergesort(base, nmemb, size, cmp) 73*56962Sbostic void *base; 74*56962Sbostic size_t nmemb; 75*56962Sbostic register size_t size; 76*56962Sbostic int (*cmp) __P((const void *, const void *)); 77*56962Sbostic { 78*56962Sbostic register int i, sense; 79*56962Sbostic int big, iflag; 80*56962Sbostic register u_char *f1, *f2, *t, *b, *tp2, *q, *l1, *l2; 81*56962Sbostic u_char *list2, *list1, *p2, *p, *last, **p1; 82*56962Sbostic 83*56962Sbostic if (size < PSIZE/2) { /* pointers must fit into 2*size */ 84*56962Sbostic errno = EINVAL; 85*56962Sbostic return (-1); 86*56962Sbostic } 87*56962Sbostic 88*56962Sbostic /* 89*56962Sbostic * XXX 90*56962Sbostic * Stupid subtraction for the Cray. 91*56962Sbostic */ 92*56962Sbostic iflag = 0; 93*56962Sbostic if (!(size % ISIZE) && !(((char *)base - (char *)0) % ISIZE)) 94*56962Sbostic iflag = 1; 95*56962Sbostic 96*56962Sbostic if ((list2 = malloc(nmemb * size + PSIZE)) == NULL) 97*56962Sbostic return (-1); 98*56962Sbostic 99*56962Sbostic list1 = base; 100*56962Sbostic setup(list1, list2, nmemb, size, cmp); 101*56962Sbostic last = list2 + nmemb * size; 102*56962Sbostic i = big = 0; 103*56962Sbostic while (*EVAL(list2) != last) { 104*56962Sbostic l2 = list1; 105*56962Sbostic p1 = EVAL(list1); 106*56962Sbostic for (tp2 = p2 = list2; p2 != last; p1 = EVAL(l2)) { 107*56962Sbostic p2 = *EVAL(p2); 108*56962Sbostic f1 = l2; 109*56962Sbostic f2 = l1 = list1 + (p2 - list2); 110*56962Sbostic if (p2 != last) 111*56962Sbostic p2 = *EVAL(p2); 112*56962Sbostic l2 = list1 + (p2 - list2); 113*56962Sbostic while (f1 < l1 && f2 < l2) { 114*56962Sbostic if ((*cmp)(f1, f2) <= 0) { 115*56962Sbostic q = f2; 116*56962Sbostic b = f1, t = l1; 117*56962Sbostic sense = -1; 118*56962Sbostic } else { 119*56962Sbostic q = f1; 120*56962Sbostic b = f2, t = l2; 121*56962Sbostic sense = 0; 122*56962Sbostic } 123*56962Sbostic if (!big) { /* here i = 0 */ 124*56962Sbostic LINEAR: while ((b += size) < t && cmp(q, b) >sense) 125*56962Sbostic if (++i == 6) { 126*56962Sbostic big = 1; 127*56962Sbostic goto EXPONENTIAL; 128*56962Sbostic } 129*56962Sbostic } else { 130*56962Sbostic EXPONENTIAL: for (i = size; ; i <<= 1) 131*56962Sbostic if ((p = (b + i)) >= t) { 132*56962Sbostic if ((p = t - size) > b && 133*56962Sbostic (*cmp)(q, p) <= sense) 134*56962Sbostic t = p; 135*56962Sbostic else 136*56962Sbostic b = p; 137*56962Sbostic break; 138*56962Sbostic } else if ((*cmp)(q, p) <= sense) { 139*56962Sbostic t = p; 140*56962Sbostic if (i == size) 141*56962Sbostic big = 0; 142*56962Sbostic goto FASTCASE; 143*56962Sbostic } else 144*56962Sbostic b = p; 145*56962Sbostic SLOWCASE: while (t > b+size) { 146*56962Sbostic i = (((t - b) / size) >> 1) * size; 147*56962Sbostic if ((*cmp)(q, p = b + i) <= sense) 148*56962Sbostic t = p; 149*56962Sbostic else 150*56962Sbostic b = p; 151*56962Sbostic } 152*56962Sbostic goto COPY; 153*56962Sbostic FASTCASE: while (i > size) 154*56962Sbostic if ((*cmp)(q, 155*56962Sbostic p = b + (i >>= 1)) <= sense) 156*56962Sbostic t = p; 157*56962Sbostic else 158*56962Sbostic b = p; 159*56962Sbostic COPY: b = t; 160*56962Sbostic } 161*56962Sbostic i = size; 162*56962Sbostic if (q == f1) { 163*56962Sbostic if (iflag) { 164*56962Sbostic ICOPY_LIST(f2, tp2, b); 165*56962Sbostic ICOPY_ELT(f1, tp2, i); 166*56962Sbostic } else { 167*56962Sbostic CCOPY_LIST(f2, tp2, b); 168*56962Sbostic CCOPY_ELT(f1, tp2, i); 169*56962Sbostic } 170*56962Sbostic } else { 171*56962Sbostic if (iflag) { 172*56962Sbostic ICOPY_LIST(f1, tp2, b); 173*56962Sbostic ICOPY_ELT(f2, tp2, i); 174*56962Sbostic } else { 175*56962Sbostic CCOPY_LIST(f1, tp2, b); 176*56962Sbostic CCOPY_ELT(f2, tp2, i); 177*56962Sbostic } 178*56962Sbostic } 179*56962Sbostic } 180*56962Sbostic if (f2 < l2) { 181*56962Sbostic if (iflag) 182*56962Sbostic ICOPY_LIST(f2, tp2, l2); 183*56962Sbostic else 184*56962Sbostic CCOPY_LIST(f2, tp2, l2); 185*56962Sbostic } else if (f1 < l1) { 186*56962Sbostic if (iflag) 187*56962Sbostic ICOPY_LIST(f1, tp2, l1); 188*56962Sbostic else 189*56962Sbostic CCOPY_LIST(f1, tp2, l1); 190*56962Sbostic } 191*56962Sbostic *p1 = l2; 192*56962Sbostic } 193*56962Sbostic tp2 = list1; /* swap list1, list2 */ 194*56962Sbostic list1 = list2; 195*56962Sbostic list2 = tp2; 196*56962Sbostic last = list2 + nmemb*size; 197*56962Sbostic } 198*56962Sbostic if (base == list2) { 199*56962Sbostic memmove(list2, list1, nmemb*size); 200*56962Sbostic list2 = list1; 201*56962Sbostic } 202*56962Sbostic free(list2); 203*56962Sbostic return (0); 204*56962Sbostic } 205*56962Sbostic 206*56962Sbostic #define swap(a, b) { \ 207*56962Sbostic s = b; \ 208*56962Sbostic i = size; \ 209*56962Sbostic do { \ 210*56962Sbostic tmp = *a; *a++ = *s; *s++ = tmp; \ 211*56962Sbostic } while (--i); \ 212*56962Sbostic a -= size; \ 213*56962Sbostic } 214*56962Sbostic #define reverse(bot, top) { \ 215*56962Sbostic s = top; \ 216*56962Sbostic do { \ 217*56962Sbostic i = size; \ 218*56962Sbostic do { \ 219*56962Sbostic tmp = *bot; *bot++ = *s; *s++ = tmp; \ 220*56962Sbostic } while (--i); \ 221*56962Sbostic s -= size2; \ 222*56962Sbostic } while(bot < s); \ 223*56962Sbostic } 224*56962Sbostic 225*56962Sbostic /* 226*56962Sbostic * Optional hybrid natural/pairwise first pass. Eats up list1 in runs of 227*56962Sbostic * increasing order, list2 in a corresponding linked list. Checks for runs 228*56962Sbostic * when THRESHOLD/2 pairs compare with same sense. (Only used when NATURAL 229*56962Sbostic * is defined. Otherwise simple pairwise merging is used.) 230*56962Sbostic */ 231*56962Sbostic void 232*56962Sbostic setup(list1, list2, n, size, cmp) 233*56962Sbostic size_t n, size; 234*56962Sbostic int (*cmp) __P((const void *, const void *)); 235*56962Sbostic u_char *list1, *list2; 236*56962Sbostic { 237*56962Sbostic int i, length, size2, tmp, sense; 238*56962Sbostic u_char *f1, *f2, *s, *l2, *last, *p2; 239*56962Sbostic 240*56962Sbostic size2 = size*2; 241*56962Sbostic if (n <= 5) { 242*56962Sbostic insertionsort(list1, n, size, cmp); 243*56962Sbostic *EVAL(list2) = (u_char*) list2 + n*size; 244*56962Sbostic return; 245*56962Sbostic } 246*56962Sbostic /* 247*56962Sbostic * Avoid running pointers out of bounds; limit n to evens 248*56962Sbostic * for simplicity. 249*56962Sbostic */ 250*56962Sbostic i = 4 + (n & 1); 251*56962Sbostic insertionsort(list1 + (n - i) * size, i, size, cmp); 252*56962Sbostic last = list1 + size * (n - i); 253*56962Sbostic *EVAL(list2 + (last - list1)) = list2 + n * size; 254*56962Sbostic 255*56962Sbostic #ifdef NATURAL 256*56962Sbostic p2 = list2; 257*56962Sbostic f1 = list1; 258*56962Sbostic sense = (cmp(f1, f1 + size) > 0); 259*56962Sbostic for (; f1 < last; sense = !sense) { 260*56962Sbostic length = 2; 261*56962Sbostic /* Find pairs with same sense. */ 262*56962Sbostic for (f2 = f1 + size2; f2 < last; f2 += size2) { 263*56962Sbostic if ((cmp(f2, f2+ size) > 0) != sense) 264*56962Sbostic break; 265*56962Sbostic length += 2; 266*56962Sbostic } 267*56962Sbostic if (length < THRESHOLD) { /* Pairwise merge */ 268*56962Sbostic do { 269*56962Sbostic p2 = *EVAL(p2) = f1 + size2 - list1 + list2; 270*56962Sbostic if (sense > 0) 271*56962Sbostic swap (f1, f1 + size); 272*56962Sbostic } while ((f1 += size2) < f2); 273*56962Sbostic } else { /* Natural merge */ 274*56962Sbostic l2 = f2; 275*56962Sbostic for (f2 = f1 + size2; f2 < l2; f2 += size2) { 276*56962Sbostic if ((cmp(f2-size, f2) > 0) != sense) { 277*56962Sbostic p2 = *EVAL(p2) = f2 - list1 + list2; 278*56962Sbostic if (sense > 0) 279*56962Sbostic reverse(f1, f2-size); 280*56962Sbostic f1 = f2; 281*56962Sbostic } 282*56962Sbostic } 283*56962Sbostic if (sense > 0) 284*56962Sbostic reverse (f1, f2-size); 285*56962Sbostic f1 = f2; 286*56962Sbostic if (f2 < last || cmp(f2, f2 + size) > 0) 287*56962Sbostic p2 = *EVAL(p2) = f2 - list1 + list2; 288*56962Sbostic else 289*56962Sbostic p2 = *EVAL(p2) = list2 + n*size; 290*56962Sbostic } 291*56962Sbostic } 292*56962Sbostic #else /* pairwise merge only. */ 293*56962Sbostic for (f1 = list1, p2 = list2; f1 < last; f1 += size2) { 294*56962Sbostic p2 = *EVAL(p2) = p2 + size2; 295*56962Sbostic if (cmp (f1, f1 + size) > 0) 296*56962Sbostic swap(f1, f1 + size); 297*56962Sbostic } 298*56962Sbostic #endif /* NATURAL */ 299*56962Sbostic } 300*56962Sbostic 301*56962Sbostic /* 302*56962Sbostic * This is to avoid out-of-bounds addresses in sorting the 303*56962Sbostic * last 4 elements. 304*56962Sbostic */ 305*56962Sbostic static void 306*56962Sbostic insertionsort(a, n, size, cmp) 307*56962Sbostic u_char *a; 308*56962Sbostic size_t n, size; 309*56962Sbostic int (*cmp) __P((const void *, const void *)); 310*56962Sbostic { 311*56962Sbostic u_char *ai, *s, *t, *u, tmp; 312*56962Sbostic int i; 313*56962Sbostic 314*56962Sbostic for (ai = a+size; --n >= 1; ai += size) 315*56962Sbostic for (t = ai; t > a; t -= size) { 316*56962Sbostic u = t - size; 317*56962Sbostic if (cmp(u, t) <= 0) 318*56962Sbostic break; 319*56962Sbostic swap(u, t); 320*56962Sbostic } 321*56962Sbostic } 322