xref: /onnv-gate/usr/src/common/util/qsort.c (revision 6812:febeba71273d)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*6812Sraf  * Common Development and Distribution License (the "License").
6*6812Sraf  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211219Sraf 
220Sstevel@tonic-gate /*
23*6812Sraf  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #if !defined(_KERNEL) && !defined(_KMDB)
30*6812Sraf #include "lint.h"
310Sstevel@tonic-gate #endif /* !_KERNEL && !_KMDB */
320Sstevel@tonic-gate 
330Sstevel@tonic-gate #include <sys/types.h>
340Sstevel@tonic-gate 
350Sstevel@tonic-gate #if !defined(_KERNEL) && !defined(_KMDB)
360Sstevel@tonic-gate #include <stdlib.h>
370Sstevel@tonic-gate #include <synch.h>
380Sstevel@tonic-gate #endif /* !_KERNEL && !_KMDB */
390Sstevel@tonic-gate 
400Sstevel@tonic-gate #include "qsort.h"
410Sstevel@tonic-gate 
420Sstevel@tonic-gate static void swapp32(uint32_t *r1, uint32_t *r2, size_t cnt);
430Sstevel@tonic-gate static void swapp64(uint64_t *r1, uint64_t *r2, size_t cnt);
440Sstevel@tonic-gate static void swapi(uint32_t *r1, uint32_t *r2, size_t cnt);
450Sstevel@tonic-gate static void swapb(char *r1, char *r2, size_t cnt);
460Sstevel@tonic-gate 
470Sstevel@tonic-gate /*
480Sstevel@tonic-gate  * choose a median of 3 values
490Sstevel@tonic-gate  *
500Sstevel@tonic-gate  * note: cstyle specifically prohibits nested conditional operators
510Sstevel@tonic-gate  * but this is the only way to do the median of 3 function in-line
520Sstevel@tonic-gate  */
530Sstevel@tonic-gate #define	med3(a, b, c) (cmp((a), (b)) < 0) \
540Sstevel@tonic-gate 	? ((cmp((b), (c)) < 0) ? (b) : (cmp((a), (c)) < 0) ? (c) : (a)) \
550Sstevel@tonic-gate 	: ((cmp((b), (c)) > 0) ? (b) : (cmp((a), (c)) > 0) ? (c) : (a))
560Sstevel@tonic-gate 
570Sstevel@tonic-gate #define	THRESH_L	5	/* threshold for insertion sort */
580Sstevel@tonic-gate #define	THRESH_M3	20	/* threshold for median of 3 */
590Sstevel@tonic-gate #define	THRESH_M9	50	/* threshold for median of 9 */
600Sstevel@tonic-gate 
610Sstevel@tonic-gate typedef struct {
620Sstevel@tonic-gate 	char	*b_lim;
630Sstevel@tonic-gate 	size_t	nrec;
640Sstevel@tonic-gate } stk_t;
650Sstevel@tonic-gate 
660Sstevel@tonic-gate /*
670Sstevel@tonic-gate  * qsort() is a general purpose, in-place sorting routine using a
680Sstevel@tonic-gate  * user provided call back function for comparisons.  This implementation
690Sstevel@tonic-gate  * utilizes a ternary quicksort algorithm, and cuts over to an
700Sstevel@tonic-gate  * insertion sort for partitions involving fewer than THRESH_L records.
710Sstevel@tonic-gate  *
720Sstevel@tonic-gate  * Potential User Errors
730Sstevel@tonic-gate  *   There is no return value from qsort, this function has no method
740Sstevel@tonic-gate  *   of alerting the user that a sort did not work or could not work.
750Sstevel@tonic-gate  *   We do not print an error message or exit the process or thread,
760Sstevel@tonic-gate  *   Even if we can detect an error, We CANNOT silently return without
770Sstevel@tonic-gate  *   sorting the data, if we did so the user could never be sure the
780Sstevel@tonic-gate  *   sort completed successfully.
790Sstevel@tonic-gate  *   It is possible we could change the return value of sort from void
800Sstevel@tonic-gate  *   to int and return success or some error codes, but this gets into
810Sstevel@tonic-gate  *   standards  and compatibility issues.
820Sstevel@tonic-gate  *
830Sstevel@tonic-gate  *   Examples of qsort parameter errors might be
840Sstevel@tonic-gate  *   1) record size (rsiz) equal to 0
850Sstevel@tonic-gate  *      qsort will loop and never return.
860Sstevel@tonic-gate  *   2) record size (rsiz) less than 0
870Sstevel@tonic-gate  *      rsiz is unsigned, so a negative value is insanely large
880Sstevel@tonic-gate  *   3) number of records (nrec) is 0
890Sstevel@tonic-gate  *      This is legal - qsort will return without examining any records
900Sstevel@tonic-gate  *   4) number of records (nrec) is less than 0
910Sstevel@tonic-gate  *      nrec is unsigned, so a negative value is insanely large.
920Sstevel@tonic-gate  *   5) nrec * rsiz > memory allocation for sort array
930Sstevel@tonic-gate  *      a segment violation may occur
940Sstevel@tonic-gate  *      corruption of other memory may occur
950Sstevel@tonic-gate  *   6) The base address of the sort array is invalid
960Sstevel@tonic-gate  *      a segment violation may occur
970Sstevel@tonic-gate  *      corruption of other memory may occur
980Sstevel@tonic-gate  *   7) The user call back function is invalid
990Sstevel@tonic-gate  *      we may get alignment errors or segment violations
1000Sstevel@tonic-gate  *      we may jump into never-never land
1010Sstevel@tonic-gate  *
1020Sstevel@tonic-gate  *   Some less obvious errors might be
1030Sstevel@tonic-gate  *   8) The user compare function is not comparing correctly
1040Sstevel@tonic-gate  *   9) The user compare function modifies the data records
1050Sstevel@tonic-gate  */
1060Sstevel@tonic-gate 
1070Sstevel@tonic-gate void
qsort(void * basep,size_t nrec,size_t rsiz,int (* cmp)(const void *,const void *))1080Sstevel@tonic-gate qsort(
1090Sstevel@tonic-gate 	void		*basep,
1100Sstevel@tonic-gate 	size_t		nrec,
1110Sstevel@tonic-gate 	size_t		rsiz,
1120Sstevel@tonic-gate 	int		(*cmp)(const void *, const void *))
1130Sstevel@tonic-gate {
1140Sstevel@tonic-gate 	size_t		i;		/* temporary variable */
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate 	/* variables used by swap */
1170Sstevel@tonic-gate 	void		(*swapf)(char *, char *, size_t);
1180Sstevel@tonic-gate 	size_t		loops;
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate 	/* variables used by sort */
1210Sstevel@tonic-gate 	stk_t		stack[8 * sizeof (nrec) + 1];
1220Sstevel@tonic-gate 	stk_t		*sp;
1230Sstevel@tonic-gate 	char		*b_lim;		/* bottom limit */
1240Sstevel@tonic-gate 	char		*b_dup;		/* bottom duplicate */
1250Sstevel@tonic-gate 	char		*b_par;		/* bottom partition */
1260Sstevel@tonic-gate 	char		*t_lim;		/* top limit */
1270Sstevel@tonic-gate 	char		*t_dup;		/* top duplicate */
1280Sstevel@tonic-gate 	char		*t_par;		/* top partition */
1290Sstevel@tonic-gate 	char		*m1, *m2, *m3;	/* median pointers */
1300Sstevel@tonic-gate 	uintptr_t	d_bytelength;	/* byte length of duplicate records */
1310Sstevel@tonic-gate 	int		b_nrec;
1320Sstevel@tonic-gate 	int		t_nrec;
1330Sstevel@tonic-gate 	int		cv;		/* results of compare (bottom / top) */
1340Sstevel@tonic-gate 
1350Sstevel@tonic-gate 	/*
1360Sstevel@tonic-gate 	 * choose a swap function based on alignment and size
1370Sstevel@tonic-gate 	 *
1380Sstevel@tonic-gate 	 * The qsort function sorts an array of fixed length records.
1390Sstevel@tonic-gate 	 * We have very limited knowledge about the data record itself.
1400Sstevel@tonic-gate 	 * It may be that the data record is in the array we are sorting
1410Sstevel@tonic-gate 	 * or it may be that the array contains pointers or indexes to
1420Sstevel@tonic-gate 	 * the actual data record and all that we are sorting is the indexes.
1430Sstevel@tonic-gate 	 *
1440Sstevel@tonic-gate 	 * The following decision will choose an optimal swap function
1450Sstevel@tonic-gate 	 * based on the size and alignment of the data records
1460Sstevel@tonic-gate 	 *   swapp64	will swap 64 bit pointers
1470Sstevel@tonic-gate 	 *   swapp32	will swap 32 bit pointers
1480Sstevel@tonic-gate 	 *   swapi	will swap an array of 32 bit integers
1490Sstevel@tonic-gate 	 *   swapb	will swap an array of 8 bit characters
1500Sstevel@tonic-gate 	 *
1510Sstevel@tonic-gate 	 * swapi and swapb will also require the variable loops to be set
1520Sstevel@tonic-gate 	 * to control the length of the array being swapped
1530Sstevel@tonic-gate 	 */
1540Sstevel@tonic-gate 	if ((((uintptr_t)basep & (sizeof (uint64_t) - 1)) == 0) &&
1550Sstevel@tonic-gate 	    (rsiz == sizeof (uint64_t))) {
1560Sstevel@tonic-gate 		loops = 1;
1570Sstevel@tonic-gate 		swapf = (void (*)(char *, char *, size_t))swapp64;
1580Sstevel@tonic-gate 	} else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
1590Sstevel@tonic-gate 	    (rsiz == sizeof (uint32_t))) {
1600Sstevel@tonic-gate 		loops = 1;
1610Sstevel@tonic-gate 		swapf = (void (*)(char *, char *, size_t))swapp32;
1620Sstevel@tonic-gate 	} else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
1630Sstevel@tonic-gate 	    ((rsiz & (sizeof (uint32_t) - 1)) == 0)) {
1640Sstevel@tonic-gate 		loops = rsiz / sizeof (int);
1650Sstevel@tonic-gate 		swapf = (void (*)(char *, char *, size_t))swapi;
1660Sstevel@tonic-gate 	} else {
1670Sstevel@tonic-gate 		loops = rsiz;
1680Sstevel@tonic-gate 		swapf = swapb;
1690Sstevel@tonic-gate 	}
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate 	/*
1720Sstevel@tonic-gate 	 * qsort is a partitioning sort
1730Sstevel@tonic-gate 	 *
1740Sstevel@tonic-gate 	 * the stack is the bookkeeping mechanism to keep track of all
1750Sstevel@tonic-gate 	 * the partitions.
1760Sstevel@tonic-gate 	 *
1770Sstevel@tonic-gate 	 * each sort pass takes one partition and sorts it into two partitions.
1780Sstevel@tonic-gate 	 * at the top of the loop we simply take the partition on the top
1790Sstevel@tonic-gate 	 * of the stack and sort it. See the comments at the bottom
1800Sstevel@tonic-gate 	 * of the loop regarding which partitions to add in what order.
1810Sstevel@tonic-gate 	 *
1820Sstevel@tonic-gate 	 * initially put the whole partition on the stack
1830Sstevel@tonic-gate 	 */
1840Sstevel@tonic-gate 	sp = stack;
1850Sstevel@tonic-gate 	sp->b_lim = (char *)basep;
1860Sstevel@tonic-gate 	sp->nrec = nrec;
1870Sstevel@tonic-gate 	sp++;
1880Sstevel@tonic-gate 	while (sp > stack) {
1890Sstevel@tonic-gate 		sp--;
1900Sstevel@tonic-gate 		b_lim = sp->b_lim;
1910Sstevel@tonic-gate 		nrec = sp->nrec;
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate 		/*
1940Sstevel@tonic-gate 		 * a linear insertion sort i faster than a qsort for
1950Sstevel@tonic-gate 		 * very small number of records (THRESH_L)
1960Sstevel@tonic-gate 		 *
1970Sstevel@tonic-gate 		 * if number records < threshold use linear insertion sort
1980Sstevel@tonic-gate 		 *
1990Sstevel@tonic-gate 		 * this also handles the special case where the partition
2000Sstevel@tonic-gate 		 * 0 or 1 records length.
2010Sstevel@tonic-gate 		 */
2020Sstevel@tonic-gate 		if (nrec < THRESH_L) {
2030Sstevel@tonic-gate 			/*
2040Sstevel@tonic-gate 			 * Linear insertion sort
2050Sstevel@tonic-gate 			 */
2060Sstevel@tonic-gate 			t_par = b_lim;
2070Sstevel@tonic-gate 			for (i = 1; i < nrec; i++) {
2080Sstevel@tonic-gate 				t_par += rsiz;
2090Sstevel@tonic-gate 				b_par = t_par;
2100Sstevel@tonic-gate 				while (b_par > b_lim) {
2110Sstevel@tonic-gate 					b_par -= rsiz;
2120Sstevel@tonic-gate 					if ((*cmp)(b_par, b_par + rsiz) <= 0) {
2130Sstevel@tonic-gate 						break;
2140Sstevel@tonic-gate 					}
2150Sstevel@tonic-gate 					(*swapf)(b_par, b_par + rsiz, loops);
2160Sstevel@tonic-gate 				}
2170Sstevel@tonic-gate 			}
2180Sstevel@tonic-gate 
2190Sstevel@tonic-gate 			/*
2200Sstevel@tonic-gate 			 * a linear insertion sort will put all records
2210Sstevel@tonic-gate 			 * in their final position and will not create
2220Sstevel@tonic-gate 			 * subpartitions.
2230Sstevel@tonic-gate 			 *
2240Sstevel@tonic-gate 			 * therefore when the insertion sort is complete
2250Sstevel@tonic-gate 			 * just go to the top of the loop and get the
2260Sstevel@tonic-gate 			 * next partition to sort.
2270Sstevel@tonic-gate 			 */
2280Sstevel@tonic-gate 			continue;
2290Sstevel@tonic-gate 		}
2300Sstevel@tonic-gate 
2310Sstevel@tonic-gate 		/* quicksort */
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 		/*
2340Sstevel@tonic-gate 		 * choose a pivot record
2350Sstevel@tonic-gate 		 *
2360Sstevel@tonic-gate 		 * Ideally the pivot record will divide the partition
2370Sstevel@tonic-gate 		 * into two equal parts. however we have to balance the
2380Sstevel@tonic-gate 		 * work involved in selecting the pivot record with the
2390Sstevel@tonic-gate 		 * expected benefit.
2400Sstevel@tonic-gate 		 *
2410Sstevel@tonic-gate 		 * The choice of pivot record depends on the number of
2420Sstevel@tonic-gate 		 * records in the partition
2430Sstevel@tonic-gate 		 *
2440Sstevel@tonic-gate 		 * for small partitions (nrec < THRESH_M3)
2450Sstevel@tonic-gate 		 *   we just select the record in the middle of the partition
2460Sstevel@tonic-gate 		 *
2470Sstevel@tonic-gate 		 * if (nrec >= THRESH_M3 && nrec < THRESH_M9)
2480Sstevel@tonic-gate 		 *   we select three values and choose the median of 3
2490Sstevel@tonic-gate 		 *
2500Sstevel@tonic-gate 		 * if (nrec >= THRESH_M9)
2510Sstevel@tonic-gate 		 *   then we use an approximate median of 9
2520Sstevel@tonic-gate 		 *   9 records are selected and grouped in 3 groups of 3
2530Sstevel@tonic-gate 		 *   the median of each of these 3 groups is fed into another
2540Sstevel@tonic-gate 		 *   median of 3 decision.
2550Sstevel@tonic-gate 		 *
2560Sstevel@tonic-gate 		 * Each median of 3 decision is 2 or 3 compares,
2570Sstevel@tonic-gate 		 * so median of 9 costs between 8 and 12 compares.
2580Sstevel@tonic-gate 		 *
2590Sstevel@tonic-gate 		 * i is byte distance between two consecutive samples
2600Sstevel@tonic-gate 		 * m2 will point to the pivot record
2610Sstevel@tonic-gate 		 */
2620Sstevel@tonic-gate 		if (nrec < THRESH_M3) {
2630Sstevel@tonic-gate 			m2 = b_lim + (nrec / 2) * rsiz;
2640Sstevel@tonic-gate 		} else if (nrec < THRESH_M9) {
2650Sstevel@tonic-gate 			/* use median of 3 */
2660Sstevel@tonic-gate 			i = ((nrec - 1) / 2) * rsiz;
2670Sstevel@tonic-gate 			m2 = med3(b_lim, b_lim + i, b_lim + 2 * i);
2680Sstevel@tonic-gate 		} else {
2690Sstevel@tonic-gate 			/* approx median of 9 */
2700Sstevel@tonic-gate 			i = ((nrec - 1) / 8) * rsiz;
2710Sstevel@tonic-gate 			m1 = med3(b_lim, b_lim +  i, b_lim + 2 * i);
2720Sstevel@tonic-gate 			m2 = med3(b_lim + 3 * i, b_lim + 4 * i, b_lim + 5 * i);
2730Sstevel@tonic-gate 			m3 = med3(b_lim + 6 * i, b_lim + 7 * i, b_lim + 8 * i);
2740Sstevel@tonic-gate 			m2 = med3(m1, m2, m3);
2750Sstevel@tonic-gate 		}
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate 		/*
2780Sstevel@tonic-gate 		 * quick sort partitioning
2790Sstevel@tonic-gate 		 *
2800Sstevel@tonic-gate 		 * The partition limits are defined by bottom and top pointers
2810Sstevel@tonic-gate 		 * b_lim and t_lim.
2820Sstevel@tonic-gate 		 *
2830Sstevel@tonic-gate 		 * qsort uses a fairly standard method of moving the
2840Sstevel@tonic-gate 		 * partitioning pointers, b_par and t_par, to the middle of
2850Sstevel@tonic-gate 		 * the partition and exchanging records that are in the
2860Sstevel@tonic-gate 		 * wrong part of the partition.
2870Sstevel@tonic-gate 		 *
2880Sstevel@tonic-gate 		 * Two enhancements have been made to the basic algorithm.
2890Sstevel@tonic-gate 		 * One for handling duplicate records and one to minimize
2900Sstevel@tonic-gate 		 * the number of swaps.
2910Sstevel@tonic-gate 		 *
2920Sstevel@tonic-gate 		 * Two duplicate records pointers are (b_dup and t_dup) are
2930Sstevel@tonic-gate 		 * initially set to b_lim and t_lim.  Each time a record
2940Sstevel@tonic-gate 		 * whose sort key value is equal to the pivot record is found
2950Sstevel@tonic-gate 		 * it will be swapped with the record pointed to by
2960Sstevel@tonic-gate 		 * b_dup or t_dup and the duplicate pointer will be
2970Sstevel@tonic-gate 		 * incremented toward the center.
2980Sstevel@tonic-gate 		 * When partitioning is complete, all the duplicate records
2990Sstevel@tonic-gate 		 * will have been collected at the upper and lower limits of
3000Sstevel@tonic-gate 		 * the partition and can easily be moved adjacent to the
3010Sstevel@tonic-gate 		 * pivot record.
3020Sstevel@tonic-gate 		 *
3030Sstevel@tonic-gate 		 * The second optimization is to minimize the number of swaps.
3040Sstevel@tonic-gate 		 * The pointer m2 points to the pivot record.
3050Sstevel@tonic-gate 		 * During partitioning, if m2 is ever equal to the partitioning
3060Sstevel@tonic-gate 		 * pointers, b_par or t_par, then b_par or t_par just moves
3070Sstevel@tonic-gate 		 * onto the next record without doing a compare.
3080Sstevel@tonic-gate 		 * If as a result of duplicate record detection,
3090Sstevel@tonic-gate 		 * b_dup or t_dup is ever equal to m2,
3100Sstevel@tonic-gate 		 * then m2 is changed to point to the duplicate record and
3110Sstevel@tonic-gate 		 * b_dup or t_dup is incremented with out swapping records.
3120Sstevel@tonic-gate 		 *
3130Sstevel@tonic-gate 		 * When partitioning is done, we may not have the same pivot
3140Sstevel@tonic-gate 		 * record that we started with, but we will have one with
3150Sstevel@tonic-gate 		 * an equal sort key.
3160Sstevel@tonic-gate 		 */
3170Sstevel@tonic-gate 		b_dup = b_par		= b_lim;
3180Sstevel@tonic-gate 		t_dup = t_par = t_lim	= b_lim + rsiz * (nrec - 1);
3190Sstevel@tonic-gate 		for (;;) {
3200Sstevel@tonic-gate 
3210Sstevel@tonic-gate 			/* move bottom pointer up */
3220Sstevel@tonic-gate 			for (; b_par <= t_par; b_par += rsiz) {
3230Sstevel@tonic-gate 				if (b_par == m2) {
3240Sstevel@tonic-gate 					continue;
3250Sstevel@tonic-gate 				}
3260Sstevel@tonic-gate 				cv = cmp(b_par, m2);
3270Sstevel@tonic-gate 				if (cv > 0) {
3280Sstevel@tonic-gate 					break;
3290Sstevel@tonic-gate 				}
3300Sstevel@tonic-gate 				if (cv == 0) {
3310Sstevel@tonic-gate 					if (b_dup == m2) {
3320Sstevel@tonic-gate 						m2 = b_par;
3330Sstevel@tonic-gate 					} else if (b_dup != b_par) {
3340Sstevel@tonic-gate 						(*swapf)(b_dup, b_par, loops);
3350Sstevel@tonic-gate 					}
3360Sstevel@tonic-gate 					b_dup += rsiz;
3370Sstevel@tonic-gate 				}
3380Sstevel@tonic-gate 			}
3390Sstevel@tonic-gate 
3400Sstevel@tonic-gate 			/* move top pointer down */
3410Sstevel@tonic-gate 			for (; b_par < t_par; t_par -= rsiz) {
3420Sstevel@tonic-gate 				if (t_par == m2) {
3430Sstevel@tonic-gate 					continue;
3440Sstevel@tonic-gate 				}
3450Sstevel@tonic-gate 				cv = cmp(t_par, m2);
3460Sstevel@tonic-gate 				if (cv < 0) {
3470Sstevel@tonic-gate 					break;
3480Sstevel@tonic-gate 				}
3490Sstevel@tonic-gate 				if (cv == 0) {
3500Sstevel@tonic-gate 					if (t_dup == m2) {
3510Sstevel@tonic-gate 						m2 = t_par;
3520Sstevel@tonic-gate 					} else if (t_dup != t_par) {
3530Sstevel@tonic-gate 						(*swapf)(t_dup, t_par, loops);
3540Sstevel@tonic-gate 					}
3550Sstevel@tonic-gate 					t_dup -= rsiz;
3560Sstevel@tonic-gate 				}
3570Sstevel@tonic-gate 			}
3580Sstevel@tonic-gate 
3590Sstevel@tonic-gate 			/* break if we are done partitioning */
3600Sstevel@tonic-gate 			if (b_par >= t_par) {
3610Sstevel@tonic-gate 				break;
3620Sstevel@tonic-gate 			}
3630Sstevel@tonic-gate 
3640Sstevel@tonic-gate 			/* exchange records at upper and lower break points */
3650Sstevel@tonic-gate 			(*swapf)(b_par, t_par, loops);
3660Sstevel@tonic-gate 			b_par += rsiz;
3670Sstevel@tonic-gate 			t_par -= rsiz;
3680Sstevel@tonic-gate 		}
3690Sstevel@tonic-gate 
3700Sstevel@tonic-gate 		/*
3710Sstevel@tonic-gate 		 * partitioning is now complete
3720Sstevel@tonic-gate 		 *
3730Sstevel@tonic-gate 		 * there are two termination conditions from the partitioning
3740Sstevel@tonic-gate 		 * loop above.  Either b_par or t_par have crossed or
3750Sstevel@tonic-gate 		 * they are equal.
3760Sstevel@tonic-gate 		 *
3770Sstevel@tonic-gate 		 * we need to swap the pivot record to its final position
3780Sstevel@tonic-gate 		 * m2 could be in either the upper or lower partitions
3790Sstevel@tonic-gate 		 * or it could already be in its final position
3800Sstevel@tonic-gate 		 */
3810Sstevel@tonic-gate 		/*
3820Sstevel@tonic-gate 		 * R[b_par] > R[m2]
3830Sstevel@tonic-gate 		 * R[t_par] < R[m2]
3840Sstevel@tonic-gate 		 */
3850Sstevel@tonic-gate 		if (t_par < b_par) {
3860Sstevel@tonic-gate 			if (m2 < t_par) {
3870Sstevel@tonic-gate 				(*swapf)(m2, t_par, loops);
3880Sstevel@tonic-gate 				m2 = b_par = t_par;
3890Sstevel@tonic-gate 			} else if (m2 > b_par) {
3900Sstevel@tonic-gate 				(*swapf)(m2, b_par, loops);
3910Sstevel@tonic-gate 				m2 = t_par = b_par;
3920Sstevel@tonic-gate 			} else {
3930Sstevel@tonic-gate 				b_par = t_par = m2;
3940Sstevel@tonic-gate 			}
3950Sstevel@tonic-gate 		} else {
3960Sstevel@tonic-gate 			if (m2 < t_par) {
3970Sstevel@tonic-gate 				t_par = b_par = t_par - rsiz;
3980Sstevel@tonic-gate 			}
3990Sstevel@tonic-gate 			if (m2 != b_par) {
4000Sstevel@tonic-gate 				(*swapf)(m2, b_par, loops);
4010Sstevel@tonic-gate 			}
4020Sstevel@tonic-gate 			m2 = t_par;
4030Sstevel@tonic-gate 		}
4040Sstevel@tonic-gate 
4050Sstevel@tonic-gate 		/*
4060Sstevel@tonic-gate 		 * move bottom duplicates next to pivot
4070Sstevel@tonic-gate 		 * optimized to eliminate overlap
4080Sstevel@tonic-gate 		 */
4090Sstevel@tonic-gate 		d_bytelength = b_dup - b_lim;
4100Sstevel@tonic-gate 		if (b_par - b_dup < d_bytelength) {
4110Sstevel@tonic-gate 			b_dup = b_lim + (b_par - b_dup);
4120Sstevel@tonic-gate 		}
4130Sstevel@tonic-gate 		while (b_dup > b_lim) {
4140Sstevel@tonic-gate 			b_dup -= rsiz;
4150Sstevel@tonic-gate 			b_par -= rsiz;
4160Sstevel@tonic-gate 			(*swapf)(b_dup, b_par, loops);
4170Sstevel@tonic-gate 		}
4180Sstevel@tonic-gate 		b_par = m2 - d_bytelength;
4190Sstevel@tonic-gate 
4200Sstevel@tonic-gate 		/*
4210Sstevel@tonic-gate 		 * move top duplicates next to pivot
4220Sstevel@tonic-gate 		 */
4230Sstevel@tonic-gate 		d_bytelength = t_lim - t_dup;
4240Sstevel@tonic-gate 		if (t_dup - t_par < d_bytelength) {
4250Sstevel@tonic-gate 			t_dup = t_lim - (t_dup - t_par);
4260Sstevel@tonic-gate 		}
4270Sstevel@tonic-gate 		while (t_dup < t_lim) {
4280Sstevel@tonic-gate 			t_dup += rsiz;
4290Sstevel@tonic-gate 			t_par += rsiz;
4300Sstevel@tonic-gate 			(*swapf)(t_dup, t_par, loops);
4310Sstevel@tonic-gate 		}
4320Sstevel@tonic-gate 		t_par = m2 + d_bytelength;
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 		/*
4350Sstevel@tonic-gate 		 * when a qsort pass completes there are three partitions
4360Sstevel@tonic-gate 		 * 1) the lower contains all records less than pivot
4370Sstevel@tonic-gate 		 * 2) the upper contains all records greater than pivot
4380Sstevel@tonic-gate 		 * 3) the pivot partition contains all record equal to pivot
4390Sstevel@tonic-gate 		 *
4400Sstevel@tonic-gate 		 * all records in the pivot partition are in their final
4410Sstevel@tonic-gate 		 * position and do not need to be accounted for by the stack
4420Sstevel@tonic-gate 		 *
4430Sstevel@tonic-gate 		 * when adding partitions to the stack
4440Sstevel@tonic-gate 		 * it is important to add the largest partition first
4450Sstevel@tonic-gate 		 * to prevent stack overflow.
4460Sstevel@tonic-gate 		 *
4470Sstevel@tonic-gate 		 * calculate number of unsorted records in top and bottom
4480Sstevel@tonic-gate 		 * push resulting partitions on stack
4490Sstevel@tonic-gate 		 */
4500Sstevel@tonic-gate 		b_nrec = (b_par - b_lim) / rsiz;
4510Sstevel@tonic-gate 		t_nrec = (t_lim - t_par) / rsiz;
4520Sstevel@tonic-gate 		if (b_nrec < t_nrec) {
4530Sstevel@tonic-gate 			sp->b_lim = t_par + rsiz;
4540Sstevel@tonic-gate 			sp->nrec = t_nrec;
4550Sstevel@tonic-gate 			sp++;
4560Sstevel@tonic-gate 			sp->b_lim = b_lim;
4570Sstevel@tonic-gate 			sp->nrec = b_nrec;
4580Sstevel@tonic-gate 			sp++;
4590Sstevel@tonic-gate 		} else {
4600Sstevel@tonic-gate 			sp->b_lim = b_lim;
4610Sstevel@tonic-gate 			sp->nrec = b_nrec;
4620Sstevel@tonic-gate 			sp++;
4630Sstevel@tonic-gate 			sp->b_lim = t_par + rsiz;
4640Sstevel@tonic-gate 			sp->nrec = t_nrec;
4650Sstevel@tonic-gate 			sp++;
4660Sstevel@tonic-gate 		}
4670Sstevel@tonic-gate 	}
4680Sstevel@tonic-gate }
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate /*
4710Sstevel@tonic-gate  * The following swap functions should not create a stack frame
4720Sstevel@tonic-gate  * the SPARC call / return instruction will be executed
4730Sstevel@tonic-gate  * but the a save / restore will not be executed
4740Sstevel@tonic-gate  * which means we won't do a window turn with the spill / fill overhead
4750Sstevel@tonic-gate  * verify this by examining the assembly code
4760Sstevel@tonic-gate  */
4770Sstevel@tonic-gate 
4780Sstevel@tonic-gate /* ARGSUSED */
4790Sstevel@tonic-gate static void
swapp32(uint32_t * r1,uint32_t * r2,size_t cnt)4800Sstevel@tonic-gate swapp32(uint32_t *r1, uint32_t *r2, size_t cnt)
4810Sstevel@tonic-gate {
4820Sstevel@tonic-gate 	uint32_t temp;
4830Sstevel@tonic-gate 
4840Sstevel@tonic-gate 	temp = *r1;
4850Sstevel@tonic-gate 	*r1++ = *r2;
4860Sstevel@tonic-gate 	*r2++ = temp;
4870Sstevel@tonic-gate }
4880Sstevel@tonic-gate 
4890Sstevel@tonic-gate /* ARGSUSED */
4900Sstevel@tonic-gate static void
swapp64(uint64_t * r1,uint64_t * r2,size_t cnt)4910Sstevel@tonic-gate swapp64(uint64_t *r1, uint64_t *r2, size_t cnt)
4920Sstevel@tonic-gate {
4930Sstevel@tonic-gate 	uint64_t temp;
4940Sstevel@tonic-gate 
4950Sstevel@tonic-gate 	temp = *r1;
4960Sstevel@tonic-gate 	*r1++ = *r2;
4970Sstevel@tonic-gate 	*r2++ = temp;
4980Sstevel@tonic-gate }
4990Sstevel@tonic-gate 
5000Sstevel@tonic-gate static void
swapi(uint32_t * r1,uint32_t * r2,size_t cnt)5010Sstevel@tonic-gate swapi(uint32_t *r1, uint32_t *r2, size_t cnt)
5020Sstevel@tonic-gate {
5030Sstevel@tonic-gate 	uint32_t temp;
5040Sstevel@tonic-gate 
5050Sstevel@tonic-gate 	/* character by character */
5060Sstevel@tonic-gate 	while (cnt--) {
5070Sstevel@tonic-gate 		temp = *r1;
5080Sstevel@tonic-gate 		*r1++ = *r2;
5090Sstevel@tonic-gate 		*r2++ = temp;
5100Sstevel@tonic-gate 	}
5110Sstevel@tonic-gate }
5120Sstevel@tonic-gate 
5130Sstevel@tonic-gate static void
swapb(char * r1,char * r2,size_t cnt)5140Sstevel@tonic-gate swapb(char *r1, char *r2, size_t cnt)
5150Sstevel@tonic-gate {
5160Sstevel@tonic-gate 	char	temp;
5170Sstevel@tonic-gate 
5180Sstevel@tonic-gate 	/* character by character */
5190Sstevel@tonic-gate 	while (cnt--) {
5200Sstevel@tonic-gate 		temp = *r1;
5210Sstevel@tonic-gate 		*r1++ = *r2;
5220Sstevel@tonic-gate 		*r2++ = temp;
5230Sstevel@tonic-gate 	}
5240Sstevel@tonic-gate }
525