xref: /illumos-gate/usr/src/cmd/sort/check.c (revision 101e15b5f8a77d9433805e541996abaabc9ca8c1)
1*101e15b5SRichard Lowe /*
2*101e15b5SRichard Lowe  * CDDL HEADER START
3*101e15b5SRichard Lowe  *
4*101e15b5SRichard Lowe  * The contents of this file are subject to the terms of the
5*101e15b5SRichard Lowe  * Common Development and Distribution License, Version 1.0 only
6*101e15b5SRichard Lowe  * (the "License").  You may not use this file except in compliance
7*101e15b5SRichard Lowe  * with the License.
8*101e15b5SRichard Lowe  *
9*101e15b5SRichard Lowe  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*101e15b5SRichard Lowe  * or http://www.opensolaris.org/os/licensing.
11*101e15b5SRichard Lowe  * See the License for the specific language governing permissions
12*101e15b5SRichard Lowe  * and limitations under the License.
13*101e15b5SRichard Lowe  *
14*101e15b5SRichard Lowe  * When distributing Covered Code, include this CDDL HEADER in each
15*101e15b5SRichard Lowe  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*101e15b5SRichard Lowe  * If applicable, add the following below this CDDL HEADER, with the
17*101e15b5SRichard Lowe  * fields enclosed by brackets "[]" replaced with your own identifying
18*101e15b5SRichard Lowe  * information: Portions Copyright [yyyy] [name of copyright owner]
19*101e15b5SRichard Lowe  *
20*101e15b5SRichard Lowe  * CDDL HEADER END
21*101e15b5SRichard Lowe  */
22*101e15b5SRichard Lowe /*
23*101e15b5SRichard Lowe  * Copyright 1998-2003 Sun Microsystems, Inc.  All rights reserved.
24*101e15b5SRichard Lowe  * Use is subject to license terms.
25*101e15b5SRichard Lowe  */
26*101e15b5SRichard Lowe 
27*101e15b5SRichard Lowe #include "check.h"
28*101e15b5SRichard Lowe 
29*101e15b5SRichard Lowe #ifndef DEBUG
30*101e15b5SRichard Lowe #define	MSG_DISORDER		gettext("sort: disorder: ")
31*101e15b5SRichard Lowe #define	MSG_NONUNIQUE		gettext("sort: non-unique: ")
32*101e15b5SRichard Lowe #else /* DEBUG */
33*101e15b5SRichard Lowe #define	MSG_DISORDER		gettext("sort: disorder (%llu): ")
34*101e15b5SRichard Lowe #define	MSG_NONUNIQUE		gettext("sort: non-unique (%llu): ")
35*101e15b5SRichard Lowe #endif /* DEBUG */
36*101e15b5SRichard Lowe 
37*101e15b5SRichard Lowe #define	CHECK_FAILURE_DISORDER	0x1
38*101e15b5SRichard Lowe #define	CHECK_FAILURE_NONUNIQUE	0x2
39*101e15b5SRichard Lowe #define	CHECK_WIDE		0x4
40*101e15b5SRichard Lowe 
41*101e15b5SRichard Lowe static void
fail_check(line_rec_t * L,int flags,u_longlong_t lineno)42*101e15b5SRichard Lowe fail_check(line_rec_t *L, int flags, u_longlong_t lineno)
43*101e15b5SRichard Lowe {
44*101e15b5SRichard Lowe 	char *line;
45*101e15b5SRichard Lowe 	ssize_t length;
46*101e15b5SRichard Lowe 
47*101e15b5SRichard Lowe 	if (flags & CHECK_WIDE) {
48*101e15b5SRichard Lowe 		if ((length = (ssize_t)wcstombs(NULL, L->l_data.wp, 0)) < 0)
49*101e15b5SRichard Lowe 			die(EMSG_ILLEGAL_CHAR);
50*101e15b5SRichard Lowe 
51*101e15b5SRichard Lowe 		/*
52*101e15b5SRichard Lowe 		 * +1 for null character
53*101e15b5SRichard Lowe 		 */
54*101e15b5SRichard Lowe 		line = alloca(length + 1);
55*101e15b5SRichard Lowe 		(void) wcstombs(line, L->l_data.wp, L->l_data_length);
56*101e15b5SRichard Lowe 		line[length] = '\0';
57*101e15b5SRichard Lowe 	} else {
58*101e15b5SRichard Lowe 		line = L->l_data.sp;
59*101e15b5SRichard Lowe 		length = L->l_data_length;
60*101e15b5SRichard Lowe 	}
61*101e15b5SRichard Lowe 
62*101e15b5SRichard Lowe 	if (flags & CHECK_FAILURE_DISORDER) {
63*101e15b5SRichard Lowe 		(void) fprintf(stderr, MSG_DISORDER, lineno);
64*101e15b5SRichard Lowe 		(void) write(fileno(stderr), line, length);
65*101e15b5SRichard Lowe 		(void) fprintf(stderr, "\n");
66*101e15b5SRichard Lowe 		return;
67*101e15b5SRichard Lowe 	}
68*101e15b5SRichard Lowe 
69*101e15b5SRichard Lowe 	(void) fprintf(stderr, MSG_NONUNIQUE);
70*101e15b5SRichard Lowe 	(void) write(fileno(stderr), line, length);
71*101e15b5SRichard Lowe 	(void) fprintf(stderr, "\n");
72*101e15b5SRichard Lowe }
73*101e15b5SRichard Lowe 
74*101e15b5SRichard Lowe static void
swap_coll_bufs(line_rec_t * A,line_rec_t * B)75*101e15b5SRichard Lowe swap_coll_bufs(line_rec_t *A, line_rec_t *B)
76*101e15b5SRichard Lowe {
77*101e15b5SRichard Lowe 	char *coll_buffer = B->l_collate.sp;
78*101e15b5SRichard Lowe 	ssize_t coll_bufsize = B->l_collate_bufsize;
79*101e15b5SRichard Lowe 
80*101e15b5SRichard Lowe 	safe_free(B->l_raw_collate.sp);
81*101e15b5SRichard Lowe 	copy_line_rec(A, B);
82*101e15b5SRichard Lowe 
83*101e15b5SRichard Lowe 	A->l_collate.sp = coll_buffer;
84*101e15b5SRichard Lowe 	A->l_collate_bufsize = coll_bufsize;
85*101e15b5SRichard Lowe 	A->l_raw_collate.sp = NULL;
86*101e15b5SRichard Lowe }
87*101e15b5SRichard Lowe 
88*101e15b5SRichard Lowe /*
89*101e15b5SRichard Lowe  * check_if_sorted() interacts with a stream in a slightly different way than a
90*101e15b5SRichard Lowe  * simple sort or a merge operation:  the check involves looking at two adjacent
91*101e15b5SRichard Lowe  * lines of the file and verifying that they are collated according to the key
92*101e15b5SRichard Lowe  * specifiers given.  For files accessed via mmap(), this is simply done as the
93*101e15b5SRichard Lowe  * entirety of the file is present in the address space.  For files accessed via
94*101e15b5SRichard Lowe  * stdio, regardless of locale, we must be able to guarantee that two lines are
95*101e15b5SRichard Lowe  * present in memory at once.  The basic buffer code for stdio does not make
96*101e15b5SRichard Lowe  * such a guarantee, so we use stream_swap_buffer() to alternate between two
97*101e15b5SRichard Lowe  * input buffers.
98*101e15b5SRichard Lowe  */
99*101e15b5SRichard Lowe void
check_if_sorted(sort_t * S)100*101e15b5SRichard Lowe check_if_sorted(sort_t *S)
101*101e15b5SRichard Lowe {
102*101e15b5SRichard Lowe 	size_t input_mem;
103*101e15b5SRichard Lowe 	int numerator, denominator;
104*101e15b5SRichard Lowe 
105*101e15b5SRichard Lowe 	char *data_buffer = NULL;
106*101e15b5SRichard Lowe 	size_t data_bufsize = 0;
107*101e15b5SRichard Lowe 	line_rec_t last_line;
108*101e15b5SRichard Lowe 	u_longlong_t lineno = 0;
109*101e15b5SRichard Lowe 	int r;
110*101e15b5SRichard Lowe 	int swap_required;
111*101e15b5SRichard Lowe 	flag_t coll_flags;
112*101e15b5SRichard Lowe 	stream_t *cur_streamp = S->m_input_streams;
113*101e15b5SRichard Lowe 
114*101e15b5SRichard Lowe 	ssize_t (*conversion_fcn)(field_t *, line_rec_t *, flag_t, vchar_t) =
115*101e15b5SRichard Lowe 	    field_convert;
116*101e15b5SRichard Lowe 	int (*collation_fcn)(line_rec_t *, line_rec_t *, ssize_t, flag_t) =
117*101e15b5SRichard Lowe 	    collated;
118*101e15b5SRichard Lowe 
119*101e15b5SRichard Lowe 	set_memory_ratio(S, &numerator, &denominator);
120*101e15b5SRichard Lowe 
121*101e15b5SRichard Lowe 	if (stream_open_for_read(S, cur_streamp) > 1)
122*101e15b5SRichard Lowe 		die(EMSG_CHECK);
123*101e15b5SRichard Lowe 
124*101e15b5SRichard Lowe 	if (SOP_EOS(cur_streamp))
125*101e15b5SRichard Lowe 		exit(E_SUCCESS);
126*101e15b5SRichard Lowe 
127*101e15b5SRichard Lowe 	(void) memset(&last_line, 0, sizeof (line_rec_t));
128*101e15b5SRichard Lowe 
129*101e15b5SRichard Lowe 	/*
130*101e15b5SRichard Lowe 	 * We need to swap data buffers for the stream with each fetch, except
131*101e15b5SRichard Lowe 	 * on STREAM_MMAP (which are implicitly STREAM_SUSTAIN).
132*101e15b5SRichard Lowe 	 */
133*101e15b5SRichard Lowe 	swap_required = !(cur_streamp->s_status & STREAM_MMAP);
134*101e15b5SRichard Lowe 	if (swap_required) {
135*101e15b5SRichard Lowe 		stream_set(cur_streamp, STREAM_INSTANT);
136*101e15b5SRichard Lowe 		/*
137*101e15b5SRichard Lowe 		 * We use one half of the available memory for input, half for
138*101e15b5SRichard Lowe 		 * each buffer.  (The other half is left unreserved, in case
139*101e15b5SRichard Lowe 		 * conversions to collatable form require it.)
140*101e15b5SRichard Lowe 		 */
141*101e15b5SRichard Lowe 		input_mem = numerator * S->m_memory_available / denominator / 4;
142*101e15b5SRichard Lowe 
143*101e15b5SRichard Lowe 		stream_set_size(cur_streamp, input_mem);
144*101e15b5SRichard Lowe 		stream_swap_buffer(cur_streamp, &data_buffer, &data_bufsize);
145*101e15b5SRichard Lowe 		stream_set_size(cur_streamp, input_mem);
146*101e15b5SRichard Lowe 
147*101e15b5SRichard Lowe 		if (cur_streamp->s_status & STREAM_WIDE) {
148*101e15b5SRichard Lowe 			conversion_fcn = field_convert_wide;
149*101e15b5SRichard Lowe 			collation_fcn = collated_wide;
150*101e15b5SRichard Lowe 		}
151*101e15b5SRichard Lowe 	}
152*101e15b5SRichard Lowe 
153*101e15b5SRichard Lowe 	if (SOP_PRIME(cur_streamp) > 1)
154*101e15b5SRichard Lowe 		die(EMSG_CHECK);
155*101e15b5SRichard Lowe 
156*101e15b5SRichard Lowe 	if (S->m_field_options & FIELD_REVERSE_COMPARISONS)
157*101e15b5SRichard Lowe 		coll_flags = COLL_REVERSE;
158*101e15b5SRichard Lowe 	else
159*101e15b5SRichard Lowe 		coll_flags = 0;
160*101e15b5SRichard Lowe 	if (S->m_unique_lines)
161*101e15b5SRichard Lowe 		coll_flags |= COLL_UNIQUE;
162*101e15b5SRichard Lowe 
163*101e15b5SRichard Lowe 	cur_streamp->s_current.l_collate_bufsize = INITIAL_COLLATION_SIZE
164*101e15b5SRichard Lowe 	    * cur_streamp->s_element_size;
165*101e15b5SRichard Lowe 	cur_streamp->s_current.l_collate.sp = safe_realloc(NULL,
166*101e15b5SRichard Lowe 	    cur_streamp->s_current.l_collate_bufsize);
167*101e15b5SRichard Lowe 	cur_streamp->s_current.l_raw_collate.sp = NULL;
168*101e15b5SRichard Lowe 
169*101e15b5SRichard Lowe 	last_line.l_collate_bufsize = INITIAL_COLLATION_SIZE *
170*101e15b5SRichard Lowe 	    cur_streamp->s_element_size;
171*101e15b5SRichard Lowe 	last_line.l_collate.sp = safe_realloc(NULL,
172*101e15b5SRichard Lowe 	    last_line.l_collate_bufsize);
173*101e15b5SRichard Lowe 	last_line.l_raw_collate.sp = NULL;
174*101e15b5SRichard Lowe 
175*101e15b5SRichard Lowe 	(void) conversion_fcn(S->m_fields_head, &cur_streamp->s_current,
176*101e15b5SRichard Lowe 	    FCV_REALLOC, S->m_field_separator);
177*101e15b5SRichard Lowe 
178*101e15b5SRichard Lowe 	swap_coll_bufs(&cur_streamp->s_current, &last_line);
179*101e15b5SRichard Lowe 	if (swap_required)
180*101e15b5SRichard Lowe 		stream_swap_buffer(cur_streamp, &data_buffer, &data_bufsize);
181*101e15b5SRichard Lowe 
182*101e15b5SRichard Lowe 	while (!SOP_EOS(cur_streamp)) {
183*101e15b5SRichard Lowe 		(void) SOP_FETCH(cur_streamp);
184*101e15b5SRichard Lowe 		lineno++;
185*101e15b5SRichard Lowe 
186*101e15b5SRichard Lowe 		(void) conversion_fcn(S->m_fields_head, &cur_streamp->s_current,
187*101e15b5SRichard Lowe 		    FCV_REALLOC, S->m_field_separator);
188*101e15b5SRichard Lowe 
189*101e15b5SRichard Lowe 		r = collation_fcn(&last_line, &cur_streamp->s_current, 0,
190*101e15b5SRichard Lowe 		    coll_flags);
191*101e15b5SRichard Lowe 
192*101e15b5SRichard Lowe 		if (r < 0 || (r == 0 && S->m_unique_lines == 0)) {
193*101e15b5SRichard Lowe 			swap_coll_bufs(&cur_streamp->s_current, &last_line);
194*101e15b5SRichard Lowe 			if (swap_required)
195*101e15b5SRichard Lowe 				stream_swap_buffer(cur_streamp, &data_buffer,
196*101e15b5SRichard Lowe 				    &data_bufsize);
197*101e15b5SRichard Lowe 			continue;
198*101e15b5SRichard Lowe 		}
199*101e15b5SRichard Lowe 
200*101e15b5SRichard Lowe 		if (r > 0) {
201*101e15b5SRichard Lowe #ifndef	XPG4
202*101e15b5SRichard Lowe 			fail_check(&cur_streamp->s_current,
203*101e15b5SRichard Lowe 			    CHECK_FAILURE_DISORDER |
204*101e15b5SRichard Lowe 			    (S->m_single_byte_locale ? 0 : CHECK_WIDE),
205*101e15b5SRichard Lowe 			    lineno);
206*101e15b5SRichard Lowe #endif /* XPG4 */
207*101e15b5SRichard Lowe 			exit(E_FAILED_CHECK);
208*101e15b5SRichard Lowe 		}
209*101e15b5SRichard Lowe 
210*101e15b5SRichard Lowe 		if (r == 0 && S->m_unique_lines != 0) {
211*101e15b5SRichard Lowe #ifndef	XPG4
212*101e15b5SRichard Lowe 			fail_check(&cur_streamp->s_current,
213*101e15b5SRichard Lowe 			    CHECK_FAILURE_NONUNIQUE |
214*101e15b5SRichard Lowe 			    (S->m_single_byte_locale ? 0 : CHECK_WIDE),
215*101e15b5SRichard Lowe 			    lineno);
216*101e15b5SRichard Lowe #endif /* XPG4 */
217*101e15b5SRichard Lowe 			exit(E_FAILED_CHECK);
218*101e15b5SRichard Lowe 		}
219*101e15b5SRichard Lowe 	}
220*101e15b5SRichard Lowe 
221*101e15b5SRichard Lowe 	exit(E_SUCCESS);
222*101e15b5SRichard Lowe 	/*NOTREACHED*/
223*101e15b5SRichard Lowe }
224