xref: /onnv-gate/usr/src/cmd/sort/common/merge.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 1998-2003 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include "merge.h"
30*0Sstevel@tonic-gate 
31*0Sstevel@tonic-gate /*
32*0Sstevel@tonic-gate  * External merge sort
33*0Sstevel@tonic-gate  *
34*0Sstevel@tonic-gate  *   The following code implements the merge phase of sort(1) using a heap-based
35*0Sstevel@tonic-gate  *   priority queue.  Fast paths for merging two files as well as outputting a
36*0Sstevel@tonic-gate  *   single file are provided.
37*0Sstevel@tonic-gate  *
38*0Sstevel@tonic-gate  * Memory footprint management
39*0Sstevel@tonic-gate  *
40*0Sstevel@tonic-gate  *   The N-way fan-out of the merge phase can lead to compromising memory
41*0Sstevel@tonic-gate  *   consumption if not constrained, so two mechanisms are used to regulate
42*0Sstevel@tonic-gate  *   the memory footprint during the merge phase:
43*0Sstevel@tonic-gate  *
44*0Sstevel@tonic-gate  *   1.  Single use memory advice.  Since we proceed through each merge file in
45*0Sstevel@tonic-gate  *       order, any line we have output is never required again--at least, not
46*0Sstevel@tonic-gate  *       from that input file.  Accordingly, we use the SOP_RELEASE_LINE()
47*0Sstevel@tonic-gate  *       operation to advise that the memory backing the raw data for the stream
48*0Sstevel@tonic-gate  *       up to that line is no longer of interest.  (For certain classes of
49*0Sstevel@tonic-gate  *       streams, this leads to an madvise(3C) call with the MADV_DONTNEED
50*0Sstevel@tonic-gate  *       flag.)
51*0Sstevel@tonic-gate  *
52*0Sstevel@tonic-gate  *   2.  Number of merge files.  The number of merge files is constrained based
53*0Sstevel@tonic-gate  *       on the amount of physical memory specified via the -S option (or deemed
54*0Sstevel@tonic-gate  *       available based on an inquiry of sysconf(3C) for _SC_AVPHYS_PAGES).
55*0Sstevel@tonic-gate  *       The number of merge files is calculated based on the average resident
56*0Sstevel@tonic-gate  *       size of a stream that supports the SOP_RELEASE_LINE() operation; this
57*0Sstevel@tonic-gate  *       number is conservative for streams that do not support this operation.
58*0Sstevel@tonic-gate  *       A minimum of four subfiles will always be used, resource limits
59*0Sstevel@tonic-gate  *       permitting.
60*0Sstevel@tonic-gate  *
61*0Sstevel@tonic-gate  * Temporary filespace footprint management
62*0Sstevel@tonic-gate  *
63*0Sstevel@tonic-gate  *   Once the merge sort has utilized a temporary file, it may be deleted at
64*0Sstevel@tonic-gate  *   close, as it's not used again and preserving the files until exit may
65*0Sstevel@tonic-gate  *   compromise sort completion when limited temporary space is available.
66*0Sstevel@tonic-gate  */
67*0Sstevel@tonic-gate 
68*0Sstevel@tonic-gate static int pq_N;
69*0Sstevel@tonic-gate static stream_t	**pq_queue;
70*0Sstevel@tonic-gate static int (*pq_coll_fcn)(line_rec_t *, line_rec_t *, ssize_t, flag_t);
71*0Sstevel@tonic-gate 
72*0Sstevel@tonic-gate static ssize_t (*mg_coll_convert)(field_t *, line_rec_t *, flag_t, vchar_t);
73*0Sstevel@tonic-gate 
74*0Sstevel@tonic-gate static int
prepare_output_stream(stream_t * ostrp,sort_t * S)75*0Sstevel@tonic-gate prepare_output_stream(stream_t *ostrp, sort_t *S)
76*0Sstevel@tonic-gate {
77*0Sstevel@tonic-gate 	stream_clear(ostrp);
78*0Sstevel@tonic-gate 	stream_unset(ostrp, STREAM_OPEN);
79*0Sstevel@tonic-gate 
80*0Sstevel@tonic-gate 	stream_set(ostrp,
81*0Sstevel@tonic-gate 	    (S->m_single_byte_locale ? STREAM_SINGLE : STREAM_WIDE) |
82*0Sstevel@tonic-gate 	    (S->m_unique_lines ? STREAM_UNIQUE : 0));
83*0Sstevel@tonic-gate 
84*0Sstevel@tonic-gate 	if (S->m_output_to_stdout) {
85*0Sstevel@tonic-gate 		stream_set(ostrp, STREAM_NOTFILE);
86*0Sstevel@tonic-gate 		ostrp->s_filename = (char *)filename_stdout;
87*0Sstevel@tonic-gate 	} else
88*0Sstevel@tonic-gate 		ostrp->s_filename = S->m_output_filename;
89*0Sstevel@tonic-gate 
90*0Sstevel@tonic-gate 	return (SOP_OPEN_FOR_WRITE(ostrp));
91*0Sstevel@tonic-gate }
92*0Sstevel@tonic-gate 
93*0Sstevel@tonic-gate static void
merge_one_stream(field_t * fields_chain,stream_t * strp,stream_t * outstrp,vchar_t field_separator)94*0Sstevel@tonic-gate merge_one_stream(field_t *fields_chain, stream_t *strp, stream_t *outstrp,
95*0Sstevel@tonic-gate     vchar_t field_separator)
96*0Sstevel@tonic-gate {
97*0Sstevel@tonic-gate 	size_t element_size = strp->s_element_size;
98*0Sstevel@tonic-gate 	size_t initial_size = INITIAL_COLLATION_SIZE * element_size;
99*0Sstevel@tonic-gate 
100*0Sstevel@tonic-gate 	if (strp->s_status & STREAM_SINGLE || strp->s_status & STREAM_WIDE)
101*0Sstevel@tonic-gate 		stream_set(strp, STREAM_INSTANT);
102*0Sstevel@tonic-gate 
103*0Sstevel@tonic-gate 	if (SOP_PRIME(strp) == PRIME_SUCCEEDED) {
104*0Sstevel@tonic-gate 		strp->s_current.l_collate_bufsize = initial_size;
105*0Sstevel@tonic-gate 		strp->s_current.l_collate.sp = safe_realloc(NULL, initial_size);
106*0Sstevel@tonic-gate 
107*0Sstevel@tonic-gate 		(void) mg_coll_convert(fields_chain, &strp->s_current,
108*0Sstevel@tonic-gate 		    FCV_REALLOC, field_separator);
109*0Sstevel@tonic-gate 		SOP_PUT_LINE(outstrp, &strp->s_current);
110*0Sstevel@tonic-gate 		SOP_RELEASE_LINE(strp);
111*0Sstevel@tonic-gate 
112*0Sstevel@tonic-gate 		while (!SOP_EOS(strp)) {
113*0Sstevel@tonic-gate 			SOP_FETCH(strp);
114*0Sstevel@tonic-gate 			if (strp->s_current.l_collate_length == 0)
115*0Sstevel@tonic-gate 				(void) mg_coll_convert(fields_chain,
116*0Sstevel@tonic-gate 				    &strp->s_current, FCV_REALLOC,
117*0Sstevel@tonic-gate 				    field_separator);
118*0Sstevel@tonic-gate 			SOP_PUT_LINE(outstrp, &strp->s_current);
119*0Sstevel@tonic-gate 			SOP_RELEASE_LINE(strp);
120*0Sstevel@tonic-gate 		}
121*0Sstevel@tonic-gate 
122*0Sstevel@tonic-gate 		(void) SOP_CLOSE(strp);
123*0Sstevel@tonic-gate 		SOP_FLUSH(outstrp);
124*0Sstevel@tonic-gate 	}
125*0Sstevel@tonic-gate }
126*0Sstevel@tonic-gate 
127*0Sstevel@tonic-gate static void
merge_two_streams(field_t * fields_chain,stream_t * str_a,stream_t * str_b,stream_t * outstrp,vchar_t field_separator,flag_t coll_flags)128*0Sstevel@tonic-gate merge_two_streams(field_t *fields_chain, stream_t *str_a, stream_t *str_b,
129*0Sstevel@tonic-gate     stream_t *outstrp, vchar_t field_separator, flag_t coll_flags)
130*0Sstevel@tonic-gate {
131*0Sstevel@tonic-gate 	int (*collate_fcn)(line_rec_t *, line_rec_t *, ssize_t, flag_t);
132*0Sstevel@tonic-gate 	size_t element_size = str_a->s_element_size;
133*0Sstevel@tonic-gate 	size_t initial_size = INITIAL_COLLATION_SIZE * element_size;
134*0Sstevel@tonic-gate 
135*0Sstevel@tonic-gate 	ASSERT(str_a->s_element_size == str_b->s_element_size);
136*0Sstevel@tonic-gate 
137*0Sstevel@tonic-gate 	if (str_a->s_element_size == sizeof (char))
138*0Sstevel@tonic-gate 		collate_fcn = collated;
139*0Sstevel@tonic-gate 	else
140*0Sstevel@tonic-gate 		collate_fcn = collated_wide;
141*0Sstevel@tonic-gate 
142*0Sstevel@tonic-gate 	if (str_a->s_status & STREAM_SINGLE || str_a->s_status & STREAM_WIDE)
143*0Sstevel@tonic-gate 		stream_set(str_a, STREAM_INSTANT);
144*0Sstevel@tonic-gate 	if (str_b->s_status & STREAM_SINGLE || str_b->s_status & STREAM_WIDE)
145*0Sstevel@tonic-gate 		stream_set(str_b, STREAM_INSTANT);
146*0Sstevel@tonic-gate 
147*0Sstevel@tonic-gate 	if (SOP_PRIME(str_a) != PRIME_SUCCEEDED) {
148*0Sstevel@tonic-gate 		if (SOP_PRIME(str_b) != PRIME_SUCCEEDED)
149*0Sstevel@tonic-gate 			return;
150*0Sstevel@tonic-gate 
151*0Sstevel@tonic-gate 		merge_one_stream(fields_chain, str_b, outstrp,
152*0Sstevel@tonic-gate 		    field_separator);
153*0Sstevel@tonic-gate 		return;
154*0Sstevel@tonic-gate 	}
155*0Sstevel@tonic-gate 
156*0Sstevel@tonic-gate 	if (SOP_PRIME(str_b) != PRIME_SUCCEEDED) {
157*0Sstevel@tonic-gate 		merge_one_stream(fields_chain, str_a, outstrp,
158*0Sstevel@tonic-gate 		    field_separator);
159*0Sstevel@tonic-gate 		return;
160*0Sstevel@tonic-gate 	}
161*0Sstevel@tonic-gate 
162*0Sstevel@tonic-gate 	str_a->s_current.l_collate_bufsize =
163*0Sstevel@tonic-gate 	    str_b->s_current.l_collate_bufsize = initial_size;
164*0Sstevel@tonic-gate 
165*0Sstevel@tonic-gate 	str_a->s_current.l_collate.sp = safe_realloc(NULL, initial_size);
166*0Sstevel@tonic-gate 	str_b->s_current.l_collate.sp = safe_realloc(NULL, initial_size);
167*0Sstevel@tonic-gate 
168*0Sstevel@tonic-gate 	(void) mg_coll_convert(fields_chain, &str_a->s_current, FCV_REALLOC,
169*0Sstevel@tonic-gate 	    field_separator);
170*0Sstevel@tonic-gate 	(void) mg_coll_convert(fields_chain, &str_b->s_current, FCV_REALLOC,
171*0Sstevel@tonic-gate 	    field_separator);
172*0Sstevel@tonic-gate 
173*0Sstevel@tonic-gate 	for (;;) {
174*0Sstevel@tonic-gate 		if (collate_fcn(&str_a->s_current, &str_b->s_current, 0,
175*0Sstevel@tonic-gate 		    coll_flags) < 0) {
176*0Sstevel@tonic-gate 			SOP_PUT_LINE(outstrp, &str_a->s_current);
177*0Sstevel@tonic-gate 			SOP_RELEASE_LINE(str_a);
178*0Sstevel@tonic-gate 			if (SOP_EOS(str_a)) {
179*0Sstevel@tonic-gate 				(void) SOP_CLOSE(str_a);
180*0Sstevel@tonic-gate 				str_a = str_b;
181*0Sstevel@tonic-gate 				break;
182*0Sstevel@tonic-gate 			}
183*0Sstevel@tonic-gate 			SOP_FETCH(str_a);
184*0Sstevel@tonic-gate 			if (str_a->s_current.l_collate_length != 0)
185*0Sstevel@tonic-gate 				continue;
186*0Sstevel@tonic-gate 			(void) mg_coll_convert(fields_chain, &str_a->s_current,
187*0Sstevel@tonic-gate 			    FCV_REALLOC, field_separator);
188*0Sstevel@tonic-gate 		} else {
189*0Sstevel@tonic-gate 			SOP_PUT_LINE(outstrp, &str_b->s_current);
190*0Sstevel@tonic-gate 			SOP_RELEASE_LINE(str_b);
191*0Sstevel@tonic-gate 			if (SOP_EOS(str_b)) {
192*0Sstevel@tonic-gate 				SOP_CLOSE(str_b);
193*0Sstevel@tonic-gate 				break;
194*0Sstevel@tonic-gate 			}
195*0Sstevel@tonic-gate 			SOP_FETCH(str_b);
196*0Sstevel@tonic-gate 			if (str_b->s_current.l_collate_length != 0)
197*0Sstevel@tonic-gate 				continue;
198*0Sstevel@tonic-gate 			(void) mg_coll_convert(fields_chain, &str_b->s_current,
199*0Sstevel@tonic-gate 			    FCV_REALLOC, field_separator);
200*0Sstevel@tonic-gate 		}
201*0Sstevel@tonic-gate 	}
202*0Sstevel@tonic-gate 
203*0Sstevel@tonic-gate 	SOP_PUT_LINE(outstrp, &str_a->s_current);
204*0Sstevel@tonic-gate 	SOP_RELEASE_LINE(str_a);
205*0Sstevel@tonic-gate 
206*0Sstevel@tonic-gate 	while (!SOP_EOS(str_a)) {
207*0Sstevel@tonic-gate 		SOP_FETCH(str_a);
208*0Sstevel@tonic-gate 		if (str_a->s_current.l_collate_length == 0)
209*0Sstevel@tonic-gate 			(void) mg_coll_convert(fields_chain, &str_a->s_current,
210*0Sstevel@tonic-gate 			    FCV_REALLOC, field_separator);
211*0Sstevel@tonic-gate 		SOP_PUT_LINE(outstrp, &str_a->s_current);
212*0Sstevel@tonic-gate 		SOP_RELEASE_LINE(str_a);
213*0Sstevel@tonic-gate 	}
214*0Sstevel@tonic-gate 
215*0Sstevel@tonic-gate 	(void) SOP_CLOSE(str_a);
216*0Sstevel@tonic-gate 	SOP_FLUSH(outstrp);
217*0Sstevel@tonic-gate }
218*0Sstevel@tonic-gate 
219*0Sstevel@tonic-gate /*
220*0Sstevel@tonic-gate  * priority queue routines
221*0Sstevel@tonic-gate  *   used for merges involving more than two sources
222*0Sstevel@tonic-gate  */
223*0Sstevel@tonic-gate static void
heap_up(stream_t ** A,int k,flag_t coll_flags)224*0Sstevel@tonic-gate heap_up(stream_t **A, int k, flag_t coll_flags)
225*0Sstevel@tonic-gate {
226*0Sstevel@tonic-gate 	while (k > 1 &&
227*0Sstevel@tonic-gate 	    pq_coll_fcn(&A[k / 2]->s_current, &A[k]->s_current, 0,
228*0Sstevel@tonic-gate 	    coll_flags) > 0) {
229*0Sstevel@tonic-gate 		swap((void **)&pq_queue[k], (void **)&pq_queue[k / 2]);
230*0Sstevel@tonic-gate 		k /= 2;
231*0Sstevel@tonic-gate 	}
232*0Sstevel@tonic-gate }
233*0Sstevel@tonic-gate 
234*0Sstevel@tonic-gate static void
heap_down(stream_t ** A,int k,int N,flag_t coll_flags)235*0Sstevel@tonic-gate heap_down(stream_t **A, int k, int N, flag_t coll_flags)
236*0Sstevel@tonic-gate {
237*0Sstevel@tonic-gate 	int	j;
238*0Sstevel@tonic-gate 
239*0Sstevel@tonic-gate 	while (2 * k <= N) {
240*0Sstevel@tonic-gate 		j = 2 * k;
241*0Sstevel@tonic-gate 		if (j < N && pq_coll_fcn(&A[j]->s_current,
242*0Sstevel@tonic-gate 		    &A[j + 1]->s_current, 0, coll_flags) > 0)
243*0Sstevel@tonic-gate 			j++;
244*0Sstevel@tonic-gate 		if (pq_coll_fcn(&A[k]->s_current, &A[j]->s_current, 0,
245*0Sstevel@tonic-gate 		    coll_flags) <= 0)
246*0Sstevel@tonic-gate 			break;
247*0Sstevel@tonic-gate 		swap((void **)&pq_queue[k], (void **)&pq_queue[j]);
248*0Sstevel@tonic-gate 		k = j;
249*0Sstevel@tonic-gate 	}
250*0Sstevel@tonic-gate }
251*0Sstevel@tonic-gate 
252*0Sstevel@tonic-gate static int
pqueue_empty()253*0Sstevel@tonic-gate pqueue_empty()
254*0Sstevel@tonic-gate {
255*0Sstevel@tonic-gate 	return (pq_N == 0);
256*0Sstevel@tonic-gate }
257*0Sstevel@tonic-gate 
258*0Sstevel@tonic-gate static void
pqueue_init(size_t max_size,int (* coll_fcn)(line_rec_t *,line_rec_t *,ssize_t,flag_t))259*0Sstevel@tonic-gate pqueue_init(size_t max_size,
260*0Sstevel@tonic-gate     int (*coll_fcn)(line_rec_t *, line_rec_t *, ssize_t, flag_t))
261*0Sstevel@tonic-gate {
262*0Sstevel@tonic-gate 	pq_queue = safe_realloc(NULL, sizeof (stream_t *) * (max_size + 1));
263*0Sstevel@tonic-gate 	pq_N = 0;
264*0Sstevel@tonic-gate 	pq_coll_fcn = coll_fcn;
265*0Sstevel@tonic-gate }
266*0Sstevel@tonic-gate 
267*0Sstevel@tonic-gate static void
pqueue_insert(stream_t * source,flag_t coll_flags)268*0Sstevel@tonic-gate pqueue_insert(stream_t *source, flag_t coll_flags)
269*0Sstevel@tonic-gate {
270*0Sstevel@tonic-gate 	pq_queue[++pq_N] = source;
271*0Sstevel@tonic-gate 	heap_up(pq_queue, pq_N, coll_flags);
272*0Sstevel@tonic-gate }
273*0Sstevel@tonic-gate 
274*0Sstevel@tonic-gate static stream_t *
pqueue_head(flag_t coll_flags)275*0Sstevel@tonic-gate pqueue_head(flag_t coll_flags)
276*0Sstevel@tonic-gate {
277*0Sstevel@tonic-gate 	swap((void **)&pq_queue[1], (void **)&pq_queue[pq_N]);
278*0Sstevel@tonic-gate 	heap_down(pq_queue, 1, pq_N - 1, coll_flags);
279*0Sstevel@tonic-gate 	return (pq_queue[pq_N--]);
280*0Sstevel@tonic-gate }
281*0Sstevel@tonic-gate 
282*0Sstevel@tonic-gate static void
merge_n_streams(sort_t * S,stream_t * head_streamp,int n_streams,stream_t * out_streamp,flag_t coll_flags)283*0Sstevel@tonic-gate merge_n_streams(sort_t *S, stream_t *head_streamp, int n_streams,
284*0Sstevel@tonic-gate     stream_t *out_streamp, flag_t coll_flags)
285*0Sstevel@tonic-gate {
286*0Sstevel@tonic-gate 	stream_t *top_streamp;
287*0Sstevel@tonic-gate 	stream_t *cur_streamp;
288*0Sstevel@tonic-gate 	stream_t *bot_streamp;
289*0Sstevel@tonic-gate 	stream_t *loop_out_streamp;
290*0Sstevel@tonic-gate 	flag_t is_single_byte = S->m_single_byte_locale;
291*0Sstevel@tonic-gate 
292*0Sstevel@tonic-gate 	int n_opens = 0;
293*0Sstevel@tonic-gate 	int threshold_opens;
294*0Sstevel@tonic-gate 
295*0Sstevel@tonic-gate 	threshold_opens = MAX(4,
296*0Sstevel@tonic-gate 	    2 * S->m_memory_available / DEFAULT_RELEASE_SIZE);
297*0Sstevel@tonic-gate 
298*0Sstevel@tonic-gate 	pqueue_init(n_streams, is_single_byte ? collated : collated_wide);
299*0Sstevel@tonic-gate 
300*0Sstevel@tonic-gate 	top_streamp = bot_streamp = head_streamp;
301*0Sstevel@tonic-gate 
302*0Sstevel@tonic-gate 	for (;;) {
303*0Sstevel@tonic-gate 		hold_file_descriptor();
304*0Sstevel@tonic-gate 		while (bot_streamp != NULL) {
305*0Sstevel@tonic-gate 
306*0Sstevel@tonic-gate 			if (n_opens > threshold_opens ||
307*0Sstevel@tonic-gate 			    stream_open_for_read(S, bot_streamp) == -1) {
308*0Sstevel@tonic-gate 				/*
309*0Sstevel@tonic-gate 				 * Available file descriptors would exceed
310*0Sstevel@tonic-gate 				 * memory target or have been exhausted; back
311*0Sstevel@tonic-gate 				 * off to the last valid, primed stream.
312*0Sstevel@tonic-gate 				 */
313*0Sstevel@tonic-gate 				bot_streamp = bot_streamp->s_previous;
314*0Sstevel@tonic-gate 				break;
315*0Sstevel@tonic-gate 			}
316*0Sstevel@tonic-gate 
317*0Sstevel@tonic-gate 			if (bot_streamp->s_status & STREAM_SINGLE ||
318*0Sstevel@tonic-gate 			    bot_streamp->s_status & STREAM_WIDE)
319*0Sstevel@tonic-gate 				stream_set(bot_streamp, STREAM_INSTANT);
320*0Sstevel@tonic-gate 
321*0Sstevel@tonic-gate 			bot_streamp = bot_streamp->s_next;
322*0Sstevel@tonic-gate 			n_opens++;
323*0Sstevel@tonic-gate 		}
324*0Sstevel@tonic-gate 		release_file_descriptor();
325*0Sstevel@tonic-gate 
326*0Sstevel@tonic-gate 		if (bot_streamp == NULL) {
327*0Sstevel@tonic-gate 			if (prepare_output_stream(out_streamp, S) != -1)
328*0Sstevel@tonic-gate 				loop_out_streamp = out_streamp;
329*0Sstevel@tonic-gate 			else
330*0Sstevel@tonic-gate 				die(EMSG_DESCRIPTORS);
331*0Sstevel@tonic-gate 		} else {
332*0Sstevel@tonic-gate 			loop_out_streamp = stream_push_to_temporary(
333*0Sstevel@tonic-gate 			    &head_streamp, NULL, ST_OPEN | ST_NOCACHE |
334*0Sstevel@tonic-gate 			    (is_single_byte ? 0 : ST_WIDE));
335*0Sstevel@tonic-gate 
336*0Sstevel@tonic-gate 			if (loop_out_streamp == NULL ||
337*0Sstevel@tonic-gate 			    top_streamp == bot_streamp)
338*0Sstevel@tonic-gate 				/*
339*0Sstevel@tonic-gate 				 * We need three file descriptors to make
340*0Sstevel@tonic-gate 				 * progress; if top_streamp == bot_streamp, then
341*0Sstevel@tonic-gate 				 * we have only two.
342*0Sstevel@tonic-gate 				 */
343*0Sstevel@tonic-gate 				die(EMSG_DESCRIPTORS);
344*0Sstevel@tonic-gate 		}
345*0Sstevel@tonic-gate 
346*0Sstevel@tonic-gate 		for (cur_streamp = top_streamp; cur_streamp != bot_streamp;
347*0Sstevel@tonic-gate 		    cur_streamp = cur_streamp->s_next) {
348*0Sstevel@tonic-gate 			/*
349*0Sstevel@tonic-gate 			 * Empty stream?
350*0Sstevel@tonic-gate 			 */
351*0Sstevel@tonic-gate 			if (!(cur_streamp->s_status & STREAM_ARRAY) &&
352*0Sstevel@tonic-gate 			    SOP_EOS(cur_streamp)) {
353*0Sstevel@tonic-gate 				stream_unlink_temporary(cur_streamp);
354*0Sstevel@tonic-gate 				continue;
355*0Sstevel@tonic-gate 			}
356*0Sstevel@tonic-gate 
357*0Sstevel@tonic-gate 			/*
358*0Sstevel@tonic-gate 			 * Given that stream is not empty, any error in priming
359*0Sstevel@tonic-gate 			 * must be fatal.
360*0Sstevel@tonic-gate 			 */
361*0Sstevel@tonic-gate 			if (SOP_PRIME(cur_streamp) != PRIME_SUCCEEDED)
362*0Sstevel@tonic-gate 				die(EMSG_BADPRIME);
363*0Sstevel@tonic-gate 
364*0Sstevel@tonic-gate 			cur_streamp->s_current.l_collate_bufsize =
365*0Sstevel@tonic-gate 				INITIAL_COLLATION_SIZE;
366*0Sstevel@tonic-gate 			cur_streamp->s_current.l_collate.sp =
367*0Sstevel@tonic-gate 			    safe_realloc(NULL, INITIAL_COLLATION_SIZE);
368*0Sstevel@tonic-gate 			(void) mg_coll_convert(S->m_fields_head,
369*0Sstevel@tonic-gate 			    &cur_streamp->s_current, FCV_REALLOC,
370*0Sstevel@tonic-gate 			    S->m_field_separator);
371*0Sstevel@tonic-gate 
372*0Sstevel@tonic-gate 			pqueue_insert(cur_streamp, coll_flags);
373*0Sstevel@tonic-gate 		}
374*0Sstevel@tonic-gate 
375*0Sstevel@tonic-gate 		while (!pqueue_empty()) {
376*0Sstevel@tonic-gate 			cur_streamp = pqueue_head(coll_flags);
377*0Sstevel@tonic-gate 
378*0Sstevel@tonic-gate 			SOP_PUT_LINE(loop_out_streamp, &cur_streamp->s_current);
379*0Sstevel@tonic-gate 			SOP_RELEASE_LINE(cur_streamp);
380*0Sstevel@tonic-gate 
381*0Sstevel@tonic-gate 			if (!SOP_EOS(cur_streamp)) {
382*0Sstevel@tonic-gate 				SOP_FETCH(cur_streamp);
383*0Sstevel@tonic-gate 				(void) mg_coll_convert(S->m_fields_head,
384*0Sstevel@tonic-gate 				    &cur_streamp->s_current, FCV_REALLOC,
385*0Sstevel@tonic-gate 				    S->m_field_separator);
386*0Sstevel@tonic-gate 				pqueue_insert(cur_streamp, coll_flags);
387*0Sstevel@tonic-gate 			}
388*0Sstevel@tonic-gate 		}
389*0Sstevel@tonic-gate 
390*0Sstevel@tonic-gate 		cur_streamp = top_streamp;
391*0Sstevel@tonic-gate 		while (cur_streamp != bot_streamp) {
392*0Sstevel@tonic-gate 			if (!(cur_streamp->s_status & STREAM_ARRAY))
393*0Sstevel@tonic-gate 				safe_free(cur_streamp->s_current.l_collate.sp);
394*0Sstevel@tonic-gate 			cur_streamp->s_current.l_collate.sp = NULL;
395*0Sstevel@tonic-gate 
396*0Sstevel@tonic-gate 			(void) SOP_FREE(cur_streamp);
397*0Sstevel@tonic-gate 			stream_unlink_temporary(cur_streamp);
398*0Sstevel@tonic-gate 			(void) SOP_CLOSE(cur_streamp);
399*0Sstevel@tonic-gate 
400*0Sstevel@tonic-gate 			cur_streamp = cur_streamp->s_next;
401*0Sstevel@tonic-gate 		}
402*0Sstevel@tonic-gate 
403*0Sstevel@tonic-gate 		(void) SOP_FLUSH(loop_out_streamp);
404*0Sstevel@tonic-gate 
405*0Sstevel@tonic-gate 		if (bot_streamp == NULL)
406*0Sstevel@tonic-gate 			break;
407*0Sstevel@tonic-gate 
408*0Sstevel@tonic-gate 		if (!(loop_out_streamp->s_status & STREAM_NOTFILE)) {
409*0Sstevel@tonic-gate 			(void) SOP_CLOSE(loop_out_streamp);
410*0Sstevel@tonic-gate 			/*
411*0Sstevel@tonic-gate 			 * Get file size so that we may treat intermediate files
412*0Sstevel@tonic-gate 			 * with our stream_mmap facilities.
413*0Sstevel@tonic-gate 			 */
414*0Sstevel@tonic-gate 			stream_stat_chain(loop_out_streamp);
415*0Sstevel@tonic-gate 			__S(stats_incr_merge_files());
416*0Sstevel@tonic-gate 		}
417*0Sstevel@tonic-gate 
418*0Sstevel@tonic-gate 		n_opens = 0;
419*0Sstevel@tonic-gate 
420*0Sstevel@tonic-gate 		top_streamp = bot_streamp;
421*0Sstevel@tonic-gate 		bot_streamp = bot_streamp->s_next;
422*0Sstevel@tonic-gate 	}
423*0Sstevel@tonic-gate }
424*0Sstevel@tonic-gate 
425*0Sstevel@tonic-gate void
merge(sort_t * S)426*0Sstevel@tonic-gate merge(sort_t *S)
427*0Sstevel@tonic-gate {
428*0Sstevel@tonic-gate 	stream_t *merge_chain;
429*0Sstevel@tonic-gate 	stream_t *cur_streamp;
430*0Sstevel@tonic-gate 	stream_t out_stream;
431*0Sstevel@tonic-gate 	uint_t n_merges;
432*0Sstevel@tonic-gate 	flag_t coll_flags;
433*0Sstevel@tonic-gate 
434*0Sstevel@tonic-gate 	if (S->m_merge_only) {
435*0Sstevel@tonic-gate 		merge_chain = S->m_input_streams;
436*0Sstevel@tonic-gate 		set_cleanup_chain(&S->m_input_streams);
437*0Sstevel@tonic-gate 	} else {
438*0Sstevel@tonic-gate 		/*
439*0Sstevel@tonic-gate 		 * Otherwise we're inheriting the temporary output files from
440*0Sstevel@tonic-gate 		 * our internal sort.
441*0Sstevel@tonic-gate 		 */
442*0Sstevel@tonic-gate 		merge_chain = S->m_temporary_streams;
443*0Sstevel@tonic-gate 		stream_stat_chain(merge_chain);
444*0Sstevel@tonic-gate 		__S(stats_set_merge_files(stream_count_chain(merge_chain)));
445*0Sstevel@tonic-gate 	}
446*0Sstevel@tonic-gate 
447*0Sstevel@tonic-gate 	if (S->m_field_options & FIELD_REVERSE_COMPARISONS)
448*0Sstevel@tonic-gate 		coll_flags = COLL_REVERSE;
449*0Sstevel@tonic-gate 	else
450*0Sstevel@tonic-gate 		coll_flags = 0;
451*0Sstevel@tonic-gate 	if (S->m_entire_line)
452*0Sstevel@tonic-gate 		coll_flags |= COLL_UNIQUE;
453*0Sstevel@tonic-gate 
454*0Sstevel@tonic-gate 	n_merges = stream_count_chain(merge_chain);
455*0Sstevel@tonic-gate 
456*0Sstevel@tonic-gate 	mg_coll_convert = S->m_coll_convert;
457*0Sstevel@tonic-gate 	cur_streamp = merge_chain;
458*0Sstevel@tonic-gate 
459*0Sstevel@tonic-gate 	switch (n_merges) {
460*0Sstevel@tonic-gate 		case 0:
461*0Sstevel@tonic-gate 			/*
462*0Sstevel@tonic-gate 			 * No files for merge.
463*0Sstevel@tonic-gate 			 */
464*0Sstevel@tonic-gate 			warn(gettext("no files available to merge\n"));
465*0Sstevel@tonic-gate 			break;
466*0Sstevel@tonic-gate 		case 1:
467*0Sstevel@tonic-gate 			/*
468*0Sstevel@tonic-gate 			 * Fast path: only one file for merge.
469*0Sstevel@tonic-gate 			 */
470*0Sstevel@tonic-gate 			(void) stream_open_for_read(S, cur_streamp);
471*0Sstevel@tonic-gate 			(void) prepare_output_stream(&out_stream, S);
472*0Sstevel@tonic-gate 			merge_one_stream(S->m_fields_head, cur_streamp,
473*0Sstevel@tonic-gate 			    &out_stream, S->m_field_separator);
474*0Sstevel@tonic-gate 			break;
475*0Sstevel@tonic-gate 		case 2:
476*0Sstevel@tonic-gate 			/*
477*0Sstevel@tonic-gate 			 * Fast path: only two files for merge.
478*0Sstevel@tonic-gate 			 */
479*0Sstevel@tonic-gate 			(void) stream_open_for_read(S, cur_streamp);
480*0Sstevel@tonic-gate 			(void) stream_open_for_read(S, cur_streamp->s_next);
481*0Sstevel@tonic-gate 			if (prepare_output_stream(&out_stream, S) == -1)
482*0Sstevel@tonic-gate 				die(EMSG_DESCRIPTORS);
483*0Sstevel@tonic-gate 			merge_two_streams(S->m_fields_head, cur_streamp,
484*0Sstevel@tonic-gate 			    cur_streamp->s_next, &out_stream,
485*0Sstevel@tonic-gate 			    S->m_field_separator, coll_flags);
486*0Sstevel@tonic-gate 			break;
487*0Sstevel@tonic-gate 		default:
488*0Sstevel@tonic-gate 			/*
489*0Sstevel@tonic-gate 			 * Full merge.
490*0Sstevel@tonic-gate 			 */
491*0Sstevel@tonic-gate 			merge_n_streams(S, cur_streamp, n_merges, &out_stream,
492*0Sstevel@tonic-gate 			    coll_flags);
493*0Sstevel@tonic-gate 			break;
494*0Sstevel@tonic-gate 	}
495*0Sstevel@tonic-gate 
496*0Sstevel@tonic-gate 	remove_output_guard();
497*0Sstevel@tonic-gate }
498