1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate * CDDL HEADER START
3*0Sstevel@tonic-gate *
4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance
7*0Sstevel@tonic-gate * with the License.
8*0Sstevel@tonic-gate *
9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate * and limitations under the License.
13*0Sstevel@tonic-gate *
14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate *
20*0Sstevel@tonic-gate * CDDL HEADER END
21*0Sstevel@tonic-gate */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate * Copyright 1998-2003 Sun Microsystems, Inc. All rights reserved.
24*0Sstevel@tonic-gate * Use is subject to license terms.
25*0Sstevel@tonic-gate */
26*0Sstevel@tonic-gate
27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
28*0Sstevel@tonic-gate
29*0Sstevel@tonic-gate #include "merge.h"
30*0Sstevel@tonic-gate
31*0Sstevel@tonic-gate /*
32*0Sstevel@tonic-gate * External merge sort
33*0Sstevel@tonic-gate *
34*0Sstevel@tonic-gate * The following code implements the merge phase of sort(1) using a heap-based
35*0Sstevel@tonic-gate * priority queue. Fast paths for merging two files as well as outputting a
36*0Sstevel@tonic-gate * single file are provided.
37*0Sstevel@tonic-gate *
38*0Sstevel@tonic-gate * Memory footprint management
39*0Sstevel@tonic-gate *
40*0Sstevel@tonic-gate * The N-way fan-out of the merge phase can lead to compromising memory
41*0Sstevel@tonic-gate * consumption if not constrained, so two mechanisms are used to regulate
42*0Sstevel@tonic-gate * the memory footprint during the merge phase:
43*0Sstevel@tonic-gate *
44*0Sstevel@tonic-gate * 1. Single use memory advice. Since we proceed through each merge file in
45*0Sstevel@tonic-gate * order, any line we have output is never required again--at least, not
46*0Sstevel@tonic-gate * from that input file. Accordingly, we use the SOP_RELEASE_LINE()
47*0Sstevel@tonic-gate * operation to advise that the memory backing the raw data for the stream
48*0Sstevel@tonic-gate * up to that line is no longer of interest. (For certain classes of
49*0Sstevel@tonic-gate * streams, this leads to an madvise(3C) call with the MADV_DONTNEED
50*0Sstevel@tonic-gate * flag.)
51*0Sstevel@tonic-gate *
52*0Sstevel@tonic-gate * 2. Number of merge files. The number of merge files is constrained based
53*0Sstevel@tonic-gate * on the amount of physical memory specified via the -S option (or deemed
54*0Sstevel@tonic-gate * available based on an inquiry of sysconf(3C) for _SC_AVPHYS_PAGES).
55*0Sstevel@tonic-gate * The number of merge files is calculated based on the average resident
56*0Sstevel@tonic-gate * size of a stream that supports the SOP_RELEASE_LINE() operation; this
57*0Sstevel@tonic-gate * number is conservative for streams that do not support this operation.
58*0Sstevel@tonic-gate * A minimum of four subfiles will always be used, resource limits
59*0Sstevel@tonic-gate * permitting.
60*0Sstevel@tonic-gate *
61*0Sstevel@tonic-gate * Temporary filespace footprint management
62*0Sstevel@tonic-gate *
63*0Sstevel@tonic-gate * Once the merge sort has utilized a temporary file, it may be deleted at
64*0Sstevel@tonic-gate * close, as it's not used again and preserving the files until exit may
65*0Sstevel@tonic-gate * compromise sort completion when limited temporary space is available.
66*0Sstevel@tonic-gate */
67*0Sstevel@tonic-gate
68*0Sstevel@tonic-gate static int pq_N;
69*0Sstevel@tonic-gate static stream_t **pq_queue;
70*0Sstevel@tonic-gate static int (*pq_coll_fcn)(line_rec_t *, line_rec_t *, ssize_t, flag_t);
71*0Sstevel@tonic-gate
72*0Sstevel@tonic-gate static ssize_t (*mg_coll_convert)(field_t *, line_rec_t *, flag_t, vchar_t);
73*0Sstevel@tonic-gate
74*0Sstevel@tonic-gate static int
prepare_output_stream(stream_t * ostrp,sort_t * S)75*0Sstevel@tonic-gate prepare_output_stream(stream_t *ostrp, sort_t *S)
76*0Sstevel@tonic-gate {
77*0Sstevel@tonic-gate stream_clear(ostrp);
78*0Sstevel@tonic-gate stream_unset(ostrp, STREAM_OPEN);
79*0Sstevel@tonic-gate
80*0Sstevel@tonic-gate stream_set(ostrp,
81*0Sstevel@tonic-gate (S->m_single_byte_locale ? STREAM_SINGLE : STREAM_WIDE) |
82*0Sstevel@tonic-gate (S->m_unique_lines ? STREAM_UNIQUE : 0));
83*0Sstevel@tonic-gate
84*0Sstevel@tonic-gate if (S->m_output_to_stdout) {
85*0Sstevel@tonic-gate stream_set(ostrp, STREAM_NOTFILE);
86*0Sstevel@tonic-gate ostrp->s_filename = (char *)filename_stdout;
87*0Sstevel@tonic-gate } else
88*0Sstevel@tonic-gate ostrp->s_filename = S->m_output_filename;
89*0Sstevel@tonic-gate
90*0Sstevel@tonic-gate return (SOP_OPEN_FOR_WRITE(ostrp));
91*0Sstevel@tonic-gate }
92*0Sstevel@tonic-gate
93*0Sstevel@tonic-gate static void
merge_one_stream(field_t * fields_chain,stream_t * strp,stream_t * outstrp,vchar_t field_separator)94*0Sstevel@tonic-gate merge_one_stream(field_t *fields_chain, stream_t *strp, stream_t *outstrp,
95*0Sstevel@tonic-gate vchar_t field_separator)
96*0Sstevel@tonic-gate {
97*0Sstevel@tonic-gate size_t element_size = strp->s_element_size;
98*0Sstevel@tonic-gate size_t initial_size = INITIAL_COLLATION_SIZE * element_size;
99*0Sstevel@tonic-gate
100*0Sstevel@tonic-gate if (strp->s_status & STREAM_SINGLE || strp->s_status & STREAM_WIDE)
101*0Sstevel@tonic-gate stream_set(strp, STREAM_INSTANT);
102*0Sstevel@tonic-gate
103*0Sstevel@tonic-gate if (SOP_PRIME(strp) == PRIME_SUCCEEDED) {
104*0Sstevel@tonic-gate strp->s_current.l_collate_bufsize = initial_size;
105*0Sstevel@tonic-gate strp->s_current.l_collate.sp = safe_realloc(NULL, initial_size);
106*0Sstevel@tonic-gate
107*0Sstevel@tonic-gate (void) mg_coll_convert(fields_chain, &strp->s_current,
108*0Sstevel@tonic-gate FCV_REALLOC, field_separator);
109*0Sstevel@tonic-gate SOP_PUT_LINE(outstrp, &strp->s_current);
110*0Sstevel@tonic-gate SOP_RELEASE_LINE(strp);
111*0Sstevel@tonic-gate
112*0Sstevel@tonic-gate while (!SOP_EOS(strp)) {
113*0Sstevel@tonic-gate SOP_FETCH(strp);
114*0Sstevel@tonic-gate if (strp->s_current.l_collate_length == 0)
115*0Sstevel@tonic-gate (void) mg_coll_convert(fields_chain,
116*0Sstevel@tonic-gate &strp->s_current, FCV_REALLOC,
117*0Sstevel@tonic-gate field_separator);
118*0Sstevel@tonic-gate SOP_PUT_LINE(outstrp, &strp->s_current);
119*0Sstevel@tonic-gate SOP_RELEASE_LINE(strp);
120*0Sstevel@tonic-gate }
121*0Sstevel@tonic-gate
122*0Sstevel@tonic-gate (void) SOP_CLOSE(strp);
123*0Sstevel@tonic-gate SOP_FLUSH(outstrp);
124*0Sstevel@tonic-gate }
125*0Sstevel@tonic-gate }
126*0Sstevel@tonic-gate
127*0Sstevel@tonic-gate static void
merge_two_streams(field_t * fields_chain,stream_t * str_a,stream_t * str_b,stream_t * outstrp,vchar_t field_separator,flag_t coll_flags)128*0Sstevel@tonic-gate merge_two_streams(field_t *fields_chain, stream_t *str_a, stream_t *str_b,
129*0Sstevel@tonic-gate stream_t *outstrp, vchar_t field_separator, flag_t coll_flags)
130*0Sstevel@tonic-gate {
131*0Sstevel@tonic-gate int (*collate_fcn)(line_rec_t *, line_rec_t *, ssize_t, flag_t);
132*0Sstevel@tonic-gate size_t element_size = str_a->s_element_size;
133*0Sstevel@tonic-gate size_t initial_size = INITIAL_COLLATION_SIZE * element_size;
134*0Sstevel@tonic-gate
135*0Sstevel@tonic-gate ASSERT(str_a->s_element_size == str_b->s_element_size);
136*0Sstevel@tonic-gate
137*0Sstevel@tonic-gate if (str_a->s_element_size == sizeof (char))
138*0Sstevel@tonic-gate collate_fcn = collated;
139*0Sstevel@tonic-gate else
140*0Sstevel@tonic-gate collate_fcn = collated_wide;
141*0Sstevel@tonic-gate
142*0Sstevel@tonic-gate if (str_a->s_status & STREAM_SINGLE || str_a->s_status & STREAM_WIDE)
143*0Sstevel@tonic-gate stream_set(str_a, STREAM_INSTANT);
144*0Sstevel@tonic-gate if (str_b->s_status & STREAM_SINGLE || str_b->s_status & STREAM_WIDE)
145*0Sstevel@tonic-gate stream_set(str_b, STREAM_INSTANT);
146*0Sstevel@tonic-gate
147*0Sstevel@tonic-gate if (SOP_PRIME(str_a) != PRIME_SUCCEEDED) {
148*0Sstevel@tonic-gate if (SOP_PRIME(str_b) != PRIME_SUCCEEDED)
149*0Sstevel@tonic-gate return;
150*0Sstevel@tonic-gate
151*0Sstevel@tonic-gate merge_one_stream(fields_chain, str_b, outstrp,
152*0Sstevel@tonic-gate field_separator);
153*0Sstevel@tonic-gate return;
154*0Sstevel@tonic-gate }
155*0Sstevel@tonic-gate
156*0Sstevel@tonic-gate if (SOP_PRIME(str_b) != PRIME_SUCCEEDED) {
157*0Sstevel@tonic-gate merge_one_stream(fields_chain, str_a, outstrp,
158*0Sstevel@tonic-gate field_separator);
159*0Sstevel@tonic-gate return;
160*0Sstevel@tonic-gate }
161*0Sstevel@tonic-gate
162*0Sstevel@tonic-gate str_a->s_current.l_collate_bufsize =
163*0Sstevel@tonic-gate str_b->s_current.l_collate_bufsize = initial_size;
164*0Sstevel@tonic-gate
165*0Sstevel@tonic-gate str_a->s_current.l_collate.sp = safe_realloc(NULL, initial_size);
166*0Sstevel@tonic-gate str_b->s_current.l_collate.sp = safe_realloc(NULL, initial_size);
167*0Sstevel@tonic-gate
168*0Sstevel@tonic-gate (void) mg_coll_convert(fields_chain, &str_a->s_current, FCV_REALLOC,
169*0Sstevel@tonic-gate field_separator);
170*0Sstevel@tonic-gate (void) mg_coll_convert(fields_chain, &str_b->s_current, FCV_REALLOC,
171*0Sstevel@tonic-gate field_separator);
172*0Sstevel@tonic-gate
173*0Sstevel@tonic-gate for (;;) {
174*0Sstevel@tonic-gate if (collate_fcn(&str_a->s_current, &str_b->s_current, 0,
175*0Sstevel@tonic-gate coll_flags) < 0) {
176*0Sstevel@tonic-gate SOP_PUT_LINE(outstrp, &str_a->s_current);
177*0Sstevel@tonic-gate SOP_RELEASE_LINE(str_a);
178*0Sstevel@tonic-gate if (SOP_EOS(str_a)) {
179*0Sstevel@tonic-gate (void) SOP_CLOSE(str_a);
180*0Sstevel@tonic-gate str_a = str_b;
181*0Sstevel@tonic-gate break;
182*0Sstevel@tonic-gate }
183*0Sstevel@tonic-gate SOP_FETCH(str_a);
184*0Sstevel@tonic-gate if (str_a->s_current.l_collate_length != 0)
185*0Sstevel@tonic-gate continue;
186*0Sstevel@tonic-gate (void) mg_coll_convert(fields_chain, &str_a->s_current,
187*0Sstevel@tonic-gate FCV_REALLOC, field_separator);
188*0Sstevel@tonic-gate } else {
189*0Sstevel@tonic-gate SOP_PUT_LINE(outstrp, &str_b->s_current);
190*0Sstevel@tonic-gate SOP_RELEASE_LINE(str_b);
191*0Sstevel@tonic-gate if (SOP_EOS(str_b)) {
192*0Sstevel@tonic-gate SOP_CLOSE(str_b);
193*0Sstevel@tonic-gate break;
194*0Sstevel@tonic-gate }
195*0Sstevel@tonic-gate SOP_FETCH(str_b);
196*0Sstevel@tonic-gate if (str_b->s_current.l_collate_length != 0)
197*0Sstevel@tonic-gate continue;
198*0Sstevel@tonic-gate (void) mg_coll_convert(fields_chain, &str_b->s_current,
199*0Sstevel@tonic-gate FCV_REALLOC, field_separator);
200*0Sstevel@tonic-gate }
201*0Sstevel@tonic-gate }
202*0Sstevel@tonic-gate
203*0Sstevel@tonic-gate SOP_PUT_LINE(outstrp, &str_a->s_current);
204*0Sstevel@tonic-gate SOP_RELEASE_LINE(str_a);
205*0Sstevel@tonic-gate
206*0Sstevel@tonic-gate while (!SOP_EOS(str_a)) {
207*0Sstevel@tonic-gate SOP_FETCH(str_a);
208*0Sstevel@tonic-gate if (str_a->s_current.l_collate_length == 0)
209*0Sstevel@tonic-gate (void) mg_coll_convert(fields_chain, &str_a->s_current,
210*0Sstevel@tonic-gate FCV_REALLOC, field_separator);
211*0Sstevel@tonic-gate SOP_PUT_LINE(outstrp, &str_a->s_current);
212*0Sstevel@tonic-gate SOP_RELEASE_LINE(str_a);
213*0Sstevel@tonic-gate }
214*0Sstevel@tonic-gate
215*0Sstevel@tonic-gate (void) SOP_CLOSE(str_a);
216*0Sstevel@tonic-gate SOP_FLUSH(outstrp);
217*0Sstevel@tonic-gate }
218*0Sstevel@tonic-gate
219*0Sstevel@tonic-gate /*
220*0Sstevel@tonic-gate * priority queue routines
221*0Sstevel@tonic-gate * used for merges involving more than two sources
222*0Sstevel@tonic-gate */
223*0Sstevel@tonic-gate static void
heap_up(stream_t ** A,int k,flag_t coll_flags)224*0Sstevel@tonic-gate heap_up(stream_t **A, int k, flag_t coll_flags)
225*0Sstevel@tonic-gate {
226*0Sstevel@tonic-gate while (k > 1 &&
227*0Sstevel@tonic-gate pq_coll_fcn(&A[k / 2]->s_current, &A[k]->s_current, 0,
228*0Sstevel@tonic-gate coll_flags) > 0) {
229*0Sstevel@tonic-gate swap((void **)&pq_queue[k], (void **)&pq_queue[k / 2]);
230*0Sstevel@tonic-gate k /= 2;
231*0Sstevel@tonic-gate }
232*0Sstevel@tonic-gate }
233*0Sstevel@tonic-gate
234*0Sstevel@tonic-gate static void
heap_down(stream_t ** A,int k,int N,flag_t coll_flags)235*0Sstevel@tonic-gate heap_down(stream_t **A, int k, int N, flag_t coll_flags)
236*0Sstevel@tonic-gate {
237*0Sstevel@tonic-gate int j;
238*0Sstevel@tonic-gate
239*0Sstevel@tonic-gate while (2 * k <= N) {
240*0Sstevel@tonic-gate j = 2 * k;
241*0Sstevel@tonic-gate if (j < N && pq_coll_fcn(&A[j]->s_current,
242*0Sstevel@tonic-gate &A[j + 1]->s_current, 0, coll_flags) > 0)
243*0Sstevel@tonic-gate j++;
244*0Sstevel@tonic-gate if (pq_coll_fcn(&A[k]->s_current, &A[j]->s_current, 0,
245*0Sstevel@tonic-gate coll_flags) <= 0)
246*0Sstevel@tonic-gate break;
247*0Sstevel@tonic-gate swap((void **)&pq_queue[k], (void **)&pq_queue[j]);
248*0Sstevel@tonic-gate k = j;
249*0Sstevel@tonic-gate }
250*0Sstevel@tonic-gate }
251*0Sstevel@tonic-gate
252*0Sstevel@tonic-gate static int
pqueue_empty()253*0Sstevel@tonic-gate pqueue_empty()
254*0Sstevel@tonic-gate {
255*0Sstevel@tonic-gate return (pq_N == 0);
256*0Sstevel@tonic-gate }
257*0Sstevel@tonic-gate
258*0Sstevel@tonic-gate static void
pqueue_init(size_t max_size,int (* coll_fcn)(line_rec_t *,line_rec_t *,ssize_t,flag_t))259*0Sstevel@tonic-gate pqueue_init(size_t max_size,
260*0Sstevel@tonic-gate int (*coll_fcn)(line_rec_t *, line_rec_t *, ssize_t, flag_t))
261*0Sstevel@tonic-gate {
262*0Sstevel@tonic-gate pq_queue = safe_realloc(NULL, sizeof (stream_t *) * (max_size + 1));
263*0Sstevel@tonic-gate pq_N = 0;
264*0Sstevel@tonic-gate pq_coll_fcn = coll_fcn;
265*0Sstevel@tonic-gate }
266*0Sstevel@tonic-gate
267*0Sstevel@tonic-gate static void
pqueue_insert(stream_t * source,flag_t coll_flags)268*0Sstevel@tonic-gate pqueue_insert(stream_t *source, flag_t coll_flags)
269*0Sstevel@tonic-gate {
270*0Sstevel@tonic-gate pq_queue[++pq_N] = source;
271*0Sstevel@tonic-gate heap_up(pq_queue, pq_N, coll_flags);
272*0Sstevel@tonic-gate }
273*0Sstevel@tonic-gate
274*0Sstevel@tonic-gate static stream_t *
pqueue_head(flag_t coll_flags)275*0Sstevel@tonic-gate pqueue_head(flag_t coll_flags)
276*0Sstevel@tonic-gate {
277*0Sstevel@tonic-gate swap((void **)&pq_queue[1], (void **)&pq_queue[pq_N]);
278*0Sstevel@tonic-gate heap_down(pq_queue, 1, pq_N - 1, coll_flags);
279*0Sstevel@tonic-gate return (pq_queue[pq_N--]);
280*0Sstevel@tonic-gate }
281*0Sstevel@tonic-gate
282*0Sstevel@tonic-gate static void
merge_n_streams(sort_t * S,stream_t * head_streamp,int n_streams,stream_t * out_streamp,flag_t coll_flags)283*0Sstevel@tonic-gate merge_n_streams(sort_t *S, stream_t *head_streamp, int n_streams,
284*0Sstevel@tonic-gate stream_t *out_streamp, flag_t coll_flags)
285*0Sstevel@tonic-gate {
286*0Sstevel@tonic-gate stream_t *top_streamp;
287*0Sstevel@tonic-gate stream_t *cur_streamp;
288*0Sstevel@tonic-gate stream_t *bot_streamp;
289*0Sstevel@tonic-gate stream_t *loop_out_streamp;
290*0Sstevel@tonic-gate flag_t is_single_byte = S->m_single_byte_locale;
291*0Sstevel@tonic-gate
292*0Sstevel@tonic-gate int n_opens = 0;
293*0Sstevel@tonic-gate int threshold_opens;
294*0Sstevel@tonic-gate
295*0Sstevel@tonic-gate threshold_opens = MAX(4,
296*0Sstevel@tonic-gate 2 * S->m_memory_available / DEFAULT_RELEASE_SIZE);
297*0Sstevel@tonic-gate
298*0Sstevel@tonic-gate pqueue_init(n_streams, is_single_byte ? collated : collated_wide);
299*0Sstevel@tonic-gate
300*0Sstevel@tonic-gate top_streamp = bot_streamp = head_streamp;
301*0Sstevel@tonic-gate
302*0Sstevel@tonic-gate for (;;) {
303*0Sstevel@tonic-gate hold_file_descriptor();
304*0Sstevel@tonic-gate while (bot_streamp != NULL) {
305*0Sstevel@tonic-gate
306*0Sstevel@tonic-gate if (n_opens > threshold_opens ||
307*0Sstevel@tonic-gate stream_open_for_read(S, bot_streamp) == -1) {
308*0Sstevel@tonic-gate /*
309*0Sstevel@tonic-gate * Available file descriptors would exceed
310*0Sstevel@tonic-gate * memory target or have been exhausted; back
311*0Sstevel@tonic-gate * off to the last valid, primed stream.
312*0Sstevel@tonic-gate */
313*0Sstevel@tonic-gate bot_streamp = bot_streamp->s_previous;
314*0Sstevel@tonic-gate break;
315*0Sstevel@tonic-gate }
316*0Sstevel@tonic-gate
317*0Sstevel@tonic-gate if (bot_streamp->s_status & STREAM_SINGLE ||
318*0Sstevel@tonic-gate bot_streamp->s_status & STREAM_WIDE)
319*0Sstevel@tonic-gate stream_set(bot_streamp, STREAM_INSTANT);
320*0Sstevel@tonic-gate
321*0Sstevel@tonic-gate bot_streamp = bot_streamp->s_next;
322*0Sstevel@tonic-gate n_opens++;
323*0Sstevel@tonic-gate }
324*0Sstevel@tonic-gate release_file_descriptor();
325*0Sstevel@tonic-gate
326*0Sstevel@tonic-gate if (bot_streamp == NULL) {
327*0Sstevel@tonic-gate if (prepare_output_stream(out_streamp, S) != -1)
328*0Sstevel@tonic-gate loop_out_streamp = out_streamp;
329*0Sstevel@tonic-gate else
330*0Sstevel@tonic-gate die(EMSG_DESCRIPTORS);
331*0Sstevel@tonic-gate } else {
332*0Sstevel@tonic-gate loop_out_streamp = stream_push_to_temporary(
333*0Sstevel@tonic-gate &head_streamp, NULL, ST_OPEN | ST_NOCACHE |
334*0Sstevel@tonic-gate (is_single_byte ? 0 : ST_WIDE));
335*0Sstevel@tonic-gate
336*0Sstevel@tonic-gate if (loop_out_streamp == NULL ||
337*0Sstevel@tonic-gate top_streamp == bot_streamp)
338*0Sstevel@tonic-gate /*
339*0Sstevel@tonic-gate * We need three file descriptors to make
340*0Sstevel@tonic-gate * progress; if top_streamp == bot_streamp, then
341*0Sstevel@tonic-gate * we have only two.
342*0Sstevel@tonic-gate */
343*0Sstevel@tonic-gate die(EMSG_DESCRIPTORS);
344*0Sstevel@tonic-gate }
345*0Sstevel@tonic-gate
346*0Sstevel@tonic-gate for (cur_streamp = top_streamp; cur_streamp != bot_streamp;
347*0Sstevel@tonic-gate cur_streamp = cur_streamp->s_next) {
348*0Sstevel@tonic-gate /*
349*0Sstevel@tonic-gate * Empty stream?
350*0Sstevel@tonic-gate */
351*0Sstevel@tonic-gate if (!(cur_streamp->s_status & STREAM_ARRAY) &&
352*0Sstevel@tonic-gate SOP_EOS(cur_streamp)) {
353*0Sstevel@tonic-gate stream_unlink_temporary(cur_streamp);
354*0Sstevel@tonic-gate continue;
355*0Sstevel@tonic-gate }
356*0Sstevel@tonic-gate
357*0Sstevel@tonic-gate /*
358*0Sstevel@tonic-gate * Given that stream is not empty, any error in priming
359*0Sstevel@tonic-gate * must be fatal.
360*0Sstevel@tonic-gate */
361*0Sstevel@tonic-gate if (SOP_PRIME(cur_streamp) != PRIME_SUCCEEDED)
362*0Sstevel@tonic-gate die(EMSG_BADPRIME);
363*0Sstevel@tonic-gate
364*0Sstevel@tonic-gate cur_streamp->s_current.l_collate_bufsize =
365*0Sstevel@tonic-gate INITIAL_COLLATION_SIZE;
366*0Sstevel@tonic-gate cur_streamp->s_current.l_collate.sp =
367*0Sstevel@tonic-gate safe_realloc(NULL, INITIAL_COLLATION_SIZE);
368*0Sstevel@tonic-gate (void) mg_coll_convert(S->m_fields_head,
369*0Sstevel@tonic-gate &cur_streamp->s_current, FCV_REALLOC,
370*0Sstevel@tonic-gate S->m_field_separator);
371*0Sstevel@tonic-gate
372*0Sstevel@tonic-gate pqueue_insert(cur_streamp, coll_flags);
373*0Sstevel@tonic-gate }
374*0Sstevel@tonic-gate
375*0Sstevel@tonic-gate while (!pqueue_empty()) {
376*0Sstevel@tonic-gate cur_streamp = pqueue_head(coll_flags);
377*0Sstevel@tonic-gate
378*0Sstevel@tonic-gate SOP_PUT_LINE(loop_out_streamp, &cur_streamp->s_current);
379*0Sstevel@tonic-gate SOP_RELEASE_LINE(cur_streamp);
380*0Sstevel@tonic-gate
381*0Sstevel@tonic-gate if (!SOP_EOS(cur_streamp)) {
382*0Sstevel@tonic-gate SOP_FETCH(cur_streamp);
383*0Sstevel@tonic-gate (void) mg_coll_convert(S->m_fields_head,
384*0Sstevel@tonic-gate &cur_streamp->s_current, FCV_REALLOC,
385*0Sstevel@tonic-gate S->m_field_separator);
386*0Sstevel@tonic-gate pqueue_insert(cur_streamp, coll_flags);
387*0Sstevel@tonic-gate }
388*0Sstevel@tonic-gate }
389*0Sstevel@tonic-gate
390*0Sstevel@tonic-gate cur_streamp = top_streamp;
391*0Sstevel@tonic-gate while (cur_streamp != bot_streamp) {
392*0Sstevel@tonic-gate if (!(cur_streamp->s_status & STREAM_ARRAY))
393*0Sstevel@tonic-gate safe_free(cur_streamp->s_current.l_collate.sp);
394*0Sstevel@tonic-gate cur_streamp->s_current.l_collate.sp = NULL;
395*0Sstevel@tonic-gate
396*0Sstevel@tonic-gate (void) SOP_FREE(cur_streamp);
397*0Sstevel@tonic-gate stream_unlink_temporary(cur_streamp);
398*0Sstevel@tonic-gate (void) SOP_CLOSE(cur_streamp);
399*0Sstevel@tonic-gate
400*0Sstevel@tonic-gate cur_streamp = cur_streamp->s_next;
401*0Sstevel@tonic-gate }
402*0Sstevel@tonic-gate
403*0Sstevel@tonic-gate (void) SOP_FLUSH(loop_out_streamp);
404*0Sstevel@tonic-gate
405*0Sstevel@tonic-gate if (bot_streamp == NULL)
406*0Sstevel@tonic-gate break;
407*0Sstevel@tonic-gate
408*0Sstevel@tonic-gate if (!(loop_out_streamp->s_status & STREAM_NOTFILE)) {
409*0Sstevel@tonic-gate (void) SOP_CLOSE(loop_out_streamp);
410*0Sstevel@tonic-gate /*
411*0Sstevel@tonic-gate * Get file size so that we may treat intermediate files
412*0Sstevel@tonic-gate * with our stream_mmap facilities.
413*0Sstevel@tonic-gate */
414*0Sstevel@tonic-gate stream_stat_chain(loop_out_streamp);
415*0Sstevel@tonic-gate __S(stats_incr_merge_files());
416*0Sstevel@tonic-gate }
417*0Sstevel@tonic-gate
418*0Sstevel@tonic-gate n_opens = 0;
419*0Sstevel@tonic-gate
420*0Sstevel@tonic-gate top_streamp = bot_streamp;
421*0Sstevel@tonic-gate bot_streamp = bot_streamp->s_next;
422*0Sstevel@tonic-gate }
423*0Sstevel@tonic-gate }
424*0Sstevel@tonic-gate
425*0Sstevel@tonic-gate void
merge(sort_t * S)426*0Sstevel@tonic-gate merge(sort_t *S)
427*0Sstevel@tonic-gate {
428*0Sstevel@tonic-gate stream_t *merge_chain;
429*0Sstevel@tonic-gate stream_t *cur_streamp;
430*0Sstevel@tonic-gate stream_t out_stream;
431*0Sstevel@tonic-gate uint_t n_merges;
432*0Sstevel@tonic-gate flag_t coll_flags;
433*0Sstevel@tonic-gate
434*0Sstevel@tonic-gate if (S->m_merge_only) {
435*0Sstevel@tonic-gate merge_chain = S->m_input_streams;
436*0Sstevel@tonic-gate set_cleanup_chain(&S->m_input_streams);
437*0Sstevel@tonic-gate } else {
438*0Sstevel@tonic-gate /*
439*0Sstevel@tonic-gate * Otherwise we're inheriting the temporary output files from
440*0Sstevel@tonic-gate * our internal sort.
441*0Sstevel@tonic-gate */
442*0Sstevel@tonic-gate merge_chain = S->m_temporary_streams;
443*0Sstevel@tonic-gate stream_stat_chain(merge_chain);
444*0Sstevel@tonic-gate __S(stats_set_merge_files(stream_count_chain(merge_chain)));
445*0Sstevel@tonic-gate }
446*0Sstevel@tonic-gate
447*0Sstevel@tonic-gate if (S->m_field_options & FIELD_REVERSE_COMPARISONS)
448*0Sstevel@tonic-gate coll_flags = COLL_REVERSE;
449*0Sstevel@tonic-gate else
450*0Sstevel@tonic-gate coll_flags = 0;
451*0Sstevel@tonic-gate if (S->m_entire_line)
452*0Sstevel@tonic-gate coll_flags |= COLL_UNIQUE;
453*0Sstevel@tonic-gate
454*0Sstevel@tonic-gate n_merges = stream_count_chain(merge_chain);
455*0Sstevel@tonic-gate
456*0Sstevel@tonic-gate mg_coll_convert = S->m_coll_convert;
457*0Sstevel@tonic-gate cur_streamp = merge_chain;
458*0Sstevel@tonic-gate
459*0Sstevel@tonic-gate switch (n_merges) {
460*0Sstevel@tonic-gate case 0:
461*0Sstevel@tonic-gate /*
462*0Sstevel@tonic-gate * No files for merge.
463*0Sstevel@tonic-gate */
464*0Sstevel@tonic-gate warn(gettext("no files available to merge\n"));
465*0Sstevel@tonic-gate break;
466*0Sstevel@tonic-gate case 1:
467*0Sstevel@tonic-gate /*
468*0Sstevel@tonic-gate * Fast path: only one file for merge.
469*0Sstevel@tonic-gate */
470*0Sstevel@tonic-gate (void) stream_open_for_read(S, cur_streamp);
471*0Sstevel@tonic-gate (void) prepare_output_stream(&out_stream, S);
472*0Sstevel@tonic-gate merge_one_stream(S->m_fields_head, cur_streamp,
473*0Sstevel@tonic-gate &out_stream, S->m_field_separator);
474*0Sstevel@tonic-gate break;
475*0Sstevel@tonic-gate case 2:
476*0Sstevel@tonic-gate /*
477*0Sstevel@tonic-gate * Fast path: only two files for merge.
478*0Sstevel@tonic-gate */
479*0Sstevel@tonic-gate (void) stream_open_for_read(S, cur_streamp);
480*0Sstevel@tonic-gate (void) stream_open_for_read(S, cur_streamp->s_next);
481*0Sstevel@tonic-gate if (prepare_output_stream(&out_stream, S) == -1)
482*0Sstevel@tonic-gate die(EMSG_DESCRIPTORS);
483*0Sstevel@tonic-gate merge_two_streams(S->m_fields_head, cur_streamp,
484*0Sstevel@tonic-gate cur_streamp->s_next, &out_stream,
485*0Sstevel@tonic-gate S->m_field_separator, coll_flags);
486*0Sstevel@tonic-gate break;
487*0Sstevel@tonic-gate default:
488*0Sstevel@tonic-gate /*
489*0Sstevel@tonic-gate * Full merge.
490*0Sstevel@tonic-gate */
491*0Sstevel@tonic-gate merge_n_streams(S, cur_streamp, n_merges, &out_stream,
492*0Sstevel@tonic-gate coll_flags);
493*0Sstevel@tonic-gate break;
494*0Sstevel@tonic-gate }
495*0Sstevel@tonic-gate
496*0Sstevel@tonic-gate remove_output_guard();
497*0Sstevel@tonic-gate }
498