xref: /onnv-gate/usr/src/cmd/sort/common/check.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 1998-2003 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include "check.h"
30*0Sstevel@tonic-gate 
31*0Sstevel@tonic-gate #ifndef DEBUG
32*0Sstevel@tonic-gate #define	MSG_DISORDER		gettext("sort: disorder: ")
33*0Sstevel@tonic-gate #define	MSG_NONUNIQUE		gettext("sort: non-unique: ")
34*0Sstevel@tonic-gate #else /* DEBUG */
35*0Sstevel@tonic-gate #define	MSG_DISORDER		gettext("sort: disorder (%llu): ")
36*0Sstevel@tonic-gate #define	MSG_NONUNIQUE		gettext("sort: non-unique (%llu): ")
37*0Sstevel@tonic-gate #endif /* DEBUG */
38*0Sstevel@tonic-gate 
39*0Sstevel@tonic-gate #define	CHECK_FAILURE_DISORDER	0x1
40*0Sstevel@tonic-gate #define	CHECK_FAILURE_NONUNIQUE	0x2
41*0Sstevel@tonic-gate #define	CHECK_WIDE		0x4
42*0Sstevel@tonic-gate 
43*0Sstevel@tonic-gate static void
fail_check(line_rec_t * L,int flags,u_longlong_t lineno)44*0Sstevel@tonic-gate fail_check(line_rec_t *L, int flags, u_longlong_t lineno)
45*0Sstevel@tonic-gate {
46*0Sstevel@tonic-gate 	char *line;
47*0Sstevel@tonic-gate 	ssize_t length;
48*0Sstevel@tonic-gate 
49*0Sstevel@tonic-gate 	if (flags & CHECK_WIDE) {
50*0Sstevel@tonic-gate 		if ((length = (ssize_t)wcstombs(NULL, L->l_data.wp, 0)) < 0)
51*0Sstevel@tonic-gate 			die(EMSG_ILLEGAL_CHAR);
52*0Sstevel@tonic-gate 
53*0Sstevel@tonic-gate 		/*
54*0Sstevel@tonic-gate 		 * +1 for null character
55*0Sstevel@tonic-gate 		 */
56*0Sstevel@tonic-gate 		line = alloca(length + 1);
57*0Sstevel@tonic-gate 		(void) wcstombs(line, L->l_data.wp, L->l_data_length);
58*0Sstevel@tonic-gate 		line[length] = '\0';
59*0Sstevel@tonic-gate 	} else {
60*0Sstevel@tonic-gate 		line = L->l_data.sp;
61*0Sstevel@tonic-gate 		length = L->l_data_length;
62*0Sstevel@tonic-gate 	}
63*0Sstevel@tonic-gate 
64*0Sstevel@tonic-gate 	if (flags & CHECK_FAILURE_DISORDER) {
65*0Sstevel@tonic-gate 		(void) fprintf(stderr, MSG_DISORDER, lineno);
66*0Sstevel@tonic-gate 		(void) write(fileno(stderr), line, length);
67*0Sstevel@tonic-gate 		(void) fprintf(stderr, "\n");
68*0Sstevel@tonic-gate 		return;
69*0Sstevel@tonic-gate 	}
70*0Sstevel@tonic-gate 
71*0Sstevel@tonic-gate 	(void) fprintf(stderr, MSG_NONUNIQUE);
72*0Sstevel@tonic-gate 	(void) write(fileno(stderr), line, length);
73*0Sstevel@tonic-gate 	(void) fprintf(stderr, "\n");
74*0Sstevel@tonic-gate }
75*0Sstevel@tonic-gate 
76*0Sstevel@tonic-gate static void
swap_coll_bufs(line_rec_t * A,line_rec_t * B)77*0Sstevel@tonic-gate swap_coll_bufs(line_rec_t *A, line_rec_t *B)
78*0Sstevel@tonic-gate {
79*0Sstevel@tonic-gate 	char *coll_buffer = B->l_collate.sp;
80*0Sstevel@tonic-gate 	ssize_t coll_bufsize = B->l_collate_bufsize;
81*0Sstevel@tonic-gate 
82*0Sstevel@tonic-gate 	safe_free(B->l_raw_collate.sp);
83*0Sstevel@tonic-gate 	copy_line_rec(A, B);
84*0Sstevel@tonic-gate 
85*0Sstevel@tonic-gate 	A->l_collate.sp = coll_buffer;
86*0Sstevel@tonic-gate 	A->l_collate_bufsize = coll_bufsize;
87*0Sstevel@tonic-gate 	A->l_raw_collate.sp = NULL;
88*0Sstevel@tonic-gate }
89*0Sstevel@tonic-gate 
90*0Sstevel@tonic-gate /*
91*0Sstevel@tonic-gate  * check_if_sorted() interacts with a stream in a slightly different way than a
92*0Sstevel@tonic-gate  * simple sort or a merge operation:  the check involves looking at two adjacent
93*0Sstevel@tonic-gate  * lines of the file and verifying that they are collated according to the key
94*0Sstevel@tonic-gate  * specifiers given.  For files accessed via mmap(), this is simply done as the
95*0Sstevel@tonic-gate  * entirety of the file is present in the address space.  For files accessed via
96*0Sstevel@tonic-gate  * stdio, regardless of locale, we must be able to guarantee that two lines are
97*0Sstevel@tonic-gate  * present in memory at once.  The basic buffer code for stdio does not make
98*0Sstevel@tonic-gate  * such a guarantee, so we use stream_swap_buffer() to alternate between two
99*0Sstevel@tonic-gate  * input buffers.
100*0Sstevel@tonic-gate  */
101*0Sstevel@tonic-gate void
check_if_sorted(sort_t * S)102*0Sstevel@tonic-gate check_if_sorted(sort_t *S)
103*0Sstevel@tonic-gate {
104*0Sstevel@tonic-gate 	size_t input_mem;
105*0Sstevel@tonic-gate 	int numerator, denominator;
106*0Sstevel@tonic-gate 
107*0Sstevel@tonic-gate 	char *data_buffer = NULL;
108*0Sstevel@tonic-gate 	size_t data_bufsize = 0;
109*0Sstevel@tonic-gate 	line_rec_t last_line;
110*0Sstevel@tonic-gate 	u_longlong_t lineno = 0;
111*0Sstevel@tonic-gate 	int r;
112*0Sstevel@tonic-gate 	int swap_required;
113*0Sstevel@tonic-gate 	flag_t coll_flags;
114*0Sstevel@tonic-gate 	stream_t *cur_streamp = S->m_input_streams;
115*0Sstevel@tonic-gate 
116*0Sstevel@tonic-gate 	ssize_t (*conversion_fcn)(field_t *, line_rec_t *, flag_t, vchar_t) =
117*0Sstevel@tonic-gate 	    field_convert;
118*0Sstevel@tonic-gate 	int (*collation_fcn)(line_rec_t *, line_rec_t *, ssize_t, flag_t) =
119*0Sstevel@tonic-gate 	    collated;
120*0Sstevel@tonic-gate 
121*0Sstevel@tonic-gate 	set_memory_ratio(S, &numerator, &denominator);
122*0Sstevel@tonic-gate 
123*0Sstevel@tonic-gate 	if (stream_open_for_read(S, cur_streamp) > 1)
124*0Sstevel@tonic-gate 		die(EMSG_CHECK);
125*0Sstevel@tonic-gate 
126*0Sstevel@tonic-gate 	if (SOP_EOS(cur_streamp))
127*0Sstevel@tonic-gate 		exit(E_SUCCESS);
128*0Sstevel@tonic-gate 
129*0Sstevel@tonic-gate 	(void) memset(&last_line, 0, sizeof (line_rec_t));
130*0Sstevel@tonic-gate 
131*0Sstevel@tonic-gate 	/*
132*0Sstevel@tonic-gate 	 * We need to swap data buffers for the stream with each fetch, except
133*0Sstevel@tonic-gate 	 * on STREAM_MMAP (which are implicitly STREAM_SUSTAIN).
134*0Sstevel@tonic-gate 	 */
135*0Sstevel@tonic-gate 	swap_required = !(cur_streamp->s_status & STREAM_MMAP);
136*0Sstevel@tonic-gate 	if (swap_required) {
137*0Sstevel@tonic-gate 		stream_set(cur_streamp, STREAM_INSTANT);
138*0Sstevel@tonic-gate 		/*
139*0Sstevel@tonic-gate 		 * We use one half of the available memory for input, half for
140*0Sstevel@tonic-gate 		 * each buffer.  (The other half is left unreserved, in case
141*0Sstevel@tonic-gate 		 * conversions to collatable form require it.)
142*0Sstevel@tonic-gate 		 */
143*0Sstevel@tonic-gate 		input_mem = numerator * S->m_memory_available / denominator / 4;
144*0Sstevel@tonic-gate 
145*0Sstevel@tonic-gate 		stream_set_size(cur_streamp, input_mem);
146*0Sstevel@tonic-gate 		stream_swap_buffer(cur_streamp, &data_buffer, &data_bufsize);
147*0Sstevel@tonic-gate 		stream_set_size(cur_streamp, input_mem);
148*0Sstevel@tonic-gate 
149*0Sstevel@tonic-gate 		if (cur_streamp->s_status & STREAM_WIDE) {
150*0Sstevel@tonic-gate 			conversion_fcn = field_convert_wide;
151*0Sstevel@tonic-gate 			collation_fcn = collated_wide;
152*0Sstevel@tonic-gate 		}
153*0Sstevel@tonic-gate 	}
154*0Sstevel@tonic-gate 
155*0Sstevel@tonic-gate 	if (SOP_PRIME(cur_streamp) > 1)
156*0Sstevel@tonic-gate 		die(EMSG_CHECK);
157*0Sstevel@tonic-gate 
158*0Sstevel@tonic-gate 	if (S->m_field_options & FIELD_REVERSE_COMPARISONS)
159*0Sstevel@tonic-gate 		coll_flags = COLL_REVERSE;
160*0Sstevel@tonic-gate 	else
161*0Sstevel@tonic-gate 		coll_flags = 0;
162*0Sstevel@tonic-gate 	if (S->m_unique_lines)
163*0Sstevel@tonic-gate 		coll_flags |= COLL_UNIQUE;
164*0Sstevel@tonic-gate 
165*0Sstevel@tonic-gate 	cur_streamp->s_current.l_collate_bufsize = INITIAL_COLLATION_SIZE
166*0Sstevel@tonic-gate 	    * cur_streamp->s_element_size;
167*0Sstevel@tonic-gate 	cur_streamp->s_current.l_collate.sp = safe_realloc(NULL,
168*0Sstevel@tonic-gate 	    cur_streamp->s_current.l_collate_bufsize);
169*0Sstevel@tonic-gate 	cur_streamp->s_current.l_raw_collate.sp = NULL;
170*0Sstevel@tonic-gate 
171*0Sstevel@tonic-gate 	last_line.l_collate_bufsize = INITIAL_COLLATION_SIZE *
172*0Sstevel@tonic-gate 	    cur_streamp->s_element_size;
173*0Sstevel@tonic-gate 	last_line.l_collate.sp = safe_realloc(NULL,
174*0Sstevel@tonic-gate 	    last_line.l_collate_bufsize);
175*0Sstevel@tonic-gate 	last_line.l_raw_collate.sp = NULL;
176*0Sstevel@tonic-gate 
177*0Sstevel@tonic-gate 	(void) conversion_fcn(S->m_fields_head, &cur_streamp->s_current,
178*0Sstevel@tonic-gate 	    FCV_REALLOC, S->m_field_separator);
179*0Sstevel@tonic-gate 
180*0Sstevel@tonic-gate 	swap_coll_bufs(&cur_streamp->s_current, &last_line);
181*0Sstevel@tonic-gate 	if (swap_required)
182*0Sstevel@tonic-gate 		stream_swap_buffer(cur_streamp, &data_buffer, &data_bufsize);
183*0Sstevel@tonic-gate 
184*0Sstevel@tonic-gate 	while (!SOP_EOS(cur_streamp)) {
185*0Sstevel@tonic-gate 		(void) SOP_FETCH(cur_streamp);
186*0Sstevel@tonic-gate 		lineno++;
187*0Sstevel@tonic-gate 
188*0Sstevel@tonic-gate 		(void) conversion_fcn(S->m_fields_head, &cur_streamp->s_current,
189*0Sstevel@tonic-gate 		    FCV_REALLOC, S->m_field_separator);
190*0Sstevel@tonic-gate 
191*0Sstevel@tonic-gate 		r = collation_fcn(&last_line, &cur_streamp->s_current, 0,
192*0Sstevel@tonic-gate 		    coll_flags);
193*0Sstevel@tonic-gate 
194*0Sstevel@tonic-gate 		if (r < 0 || (r == 0 && S->m_unique_lines == 0)) {
195*0Sstevel@tonic-gate 			swap_coll_bufs(&cur_streamp->s_current, &last_line);
196*0Sstevel@tonic-gate 			if (swap_required)
197*0Sstevel@tonic-gate 				stream_swap_buffer(cur_streamp, &data_buffer,
198*0Sstevel@tonic-gate 				    &data_bufsize);
199*0Sstevel@tonic-gate 			continue;
200*0Sstevel@tonic-gate 		}
201*0Sstevel@tonic-gate 
202*0Sstevel@tonic-gate 		if (r > 0) {
203*0Sstevel@tonic-gate #ifndef	XPG4
204*0Sstevel@tonic-gate 			fail_check(&cur_streamp->s_current,
205*0Sstevel@tonic-gate 			    CHECK_FAILURE_DISORDER |
206*0Sstevel@tonic-gate 			    (S->m_single_byte_locale ? 0 : CHECK_WIDE),
207*0Sstevel@tonic-gate 			    lineno);
208*0Sstevel@tonic-gate #endif /* XPG4 */
209*0Sstevel@tonic-gate 			exit(E_FAILED_CHECK);
210*0Sstevel@tonic-gate 		}
211*0Sstevel@tonic-gate 
212*0Sstevel@tonic-gate 		if (r == 0 && S->m_unique_lines != 0) {
213*0Sstevel@tonic-gate #ifndef	XPG4
214*0Sstevel@tonic-gate 			fail_check(&cur_streamp->s_current,
215*0Sstevel@tonic-gate 			    CHECK_FAILURE_NONUNIQUE |
216*0Sstevel@tonic-gate 			    (S->m_single_byte_locale ? 0 : CHECK_WIDE),
217*0Sstevel@tonic-gate 			    lineno);
218*0Sstevel@tonic-gate #endif /* XPG4 */
219*0Sstevel@tonic-gate 			exit(E_FAILED_CHECK);
220*0Sstevel@tonic-gate 		}
221*0Sstevel@tonic-gate 	}
222*0Sstevel@tonic-gate 
223*0Sstevel@tonic-gate 	exit(E_SUCCESS);
224*0Sstevel@tonic-gate 	/*NOTREACHED*/
225*0Sstevel@tonic-gate }
226