xref: /onnv-gate/usr/src/cmd/sort/common/streams_wide.c (revision 5836:a07f85025dc3)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*5836Snakanon  * Common Development and Distribution License (the "License").
6*5836Snakanon  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*5836Snakanon  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include "streams_wide.h"
290Sstevel@tonic-gate #include "streams_common.h"
300Sstevel@tonic-gate 
310Sstevel@tonic-gate #define	WIDE_VBUF_SIZE	(64 * KILOBYTE)
320Sstevel@tonic-gate 
330Sstevel@tonic-gate #define	SHELF_OCCUPIED	1
340Sstevel@tonic-gate #define	SHELF_VACANT	0
350Sstevel@tonic-gate static int shelf = SHELF_VACANT;
360Sstevel@tonic-gate 
370Sstevel@tonic-gate /*
380Sstevel@tonic-gate  * Wide character streams implementation
390Sstevel@tonic-gate  *
400Sstevel@tonic-gate  *   The wide character streams implementation is, for the most part, a
410Sstevel@tonic-gate  *   reimplementation of the stdio streams implementation, using wide character
420Sstevel@tonic-gate  *   string routines.  However, fgetws(3C) retains the newline that fgets(3C)
430Sstevel@tonic-gate  *   discards while reading a complete line.  As a result, the wide character
440Sstevel@tonic-gate  *   routines need to guard against coincidental exhaustion of the buffer, as
450Sstevel@tonic-gate  *   well as overwriting the end-of-line character and correcting the
460Sstevel@tonic-gate  *   l_data_length field.
470Sstevel@tonic-gate  */
480Sstevel@tonic-gate 
490Sstevel@tonic-gate static int
stream_wide_prime(stream_t * str)500Sstevel@tonic-gate stream_wide_prime(stream_t *str)
510Sstevel@tonic-gate {
520Sstevel@tonic-gate 	stream_buffered_file_t *BF = &(str->s_type.BF);
530Sstevel@tonic-gate 	wchar_t *current_position;
540Sstevel@tonic-gate 	wchar_t *end_of_buffer;
550Sstevel@tonic-gate 	wchar_t *next_nl;
560Sstevel@tonic-gate 
570Sstevel@tonic-gate 	ASSERT(!(str->s_status & STREAM_OUTPUT));
580Sstevel@tonic-gate 	ASSERT(str->s_status & STREAM_OPEN);
590Sstevel@tonic-gate 
600Sstevel@tonic-gate 	if (str->s_status & STREAM_INSTANT && (str->s_buffer == NULL)) {
610Sstevel@tonic-gate 		str->s_buffer = xzmap(0, WIDE_VBUF_SIZE, PROT_READ |
620Sstevel@tonic-gate 		    PROT_WRITE, MAP_PRIVATE, 0);
630Sstevel@tonic-gate 		if (str->s_buffer == MAP_FAILED)
640Sstevel@tonic-gate 			die(EMSG_MMAP);
650Sstevel@tonic-gate 		str->s_buffer_size = WIDE_VBUF_SIZE;
660Sstevel@tonic-gate 	}
670Sstevel@tonic-gate 
680Sstevel@tonic-gate 	ASSERT(str->s_buffer != NULL);
690Sstevel@tonic-gate 
700Sstevel@tonic-gate 	if (stream_is_primed(str)) {
710Sstevel@tonic-gate 		int shelf_state = shelf;
720Sstevel@tonic-gate 
730Sstevel@tonic-gate 		ASSERT(str->s_current.l_data_length >= -1);
740Sstevel@tonic-gate 		(void) memcpy(str->s_buffer, str->s_current.l_data.wp,
750Sstevel@tonic-gate 		    (str->s_current.l_data_length + 1) * sizeof (wchar_t));
760Sstevel@tonic-gate 		str->s_current.l_data.wp = str->s_buffer;
770Sstevel@tonic-gate 
780Sstevel@tonic-gate 		if ((str->s_current.l_data_length == -1 ||
790Sstevel@tonic-gate 		    shelf_state == SHELF_OCCUPIED ||
800Sstevel@tonic-gate 		    *(str->s_current.l_data.wp +
810Sstevel@tonic-gate 		    str->s_current.l_data_length) != L'\0') &&
820Sstevel@tonic-gate 		    SOP_FETCH(str) == NEXT_LINE_INCOMPLETE &&
830Sstevel@tonic-gate 		    shelf_state == SHELF_OCCUPIED)
840Sstevel@tonic-gate 			die(EMSG_MEMORY);
850Sstevel@tonic-gate 
860Sstevel@tonic-gate 		return (PRIME_SUCCEEDED);
870Sstevel@tonic-gate 	}
880Sstevel@tonic-gate 
890Sstevel@tonic-gate 	stream_set(str, STREAM_PRIMED);
900Sstevel@tonic-gate 
910Sstevel@tonic-gate 	current_position = (wchar_t *)str->s_buffer;
920Sstevel@tonic-gate 	/*LINTED ALIGNMENT*/
930Sstevel@tonic-gate 	end_of_buffer = (wchar_t *)((char *)str->s_buffer +
940Sstevel@tonic-gate 	    str->s_buffer_size);
950Sstevel@tonic-gate 
960Sstevel@tonic-gate 	trip_eof(BF->s_fp);
970Sstevel@tonic-gate 	if (!feof(BF->s_fp))
980Sstevel@tonic-gate 		(void) fgetws(current_position, end_of_buffer
990Sstevel@tonic-gate 		    - current_position, BF->s_fp);
1000Sstevel@tonic-gate 	else {
1010Sstevel@tonic-gate 		stream_set(str, STREAM_EOS_REACHED);
1020Sstevel@tonic-gate 		stream_unset(str, STREAM_PRIMED);
1030Sstevel@tonic-gate 		return (PRIME_FAILED_EMPTY_FILE);
1040Sstevel@tonic-gate 	}
1050Sstevel@tonic-gate 
1060Sstevel@tonic-gate 	str->s_current.l_data.wp = current_position;
1070Sstevel@tonic-gate 	next_nl = xmemwchar(current_position, L'\n', end_of_buffer -
1080Sstevel@tonic-gate 	    current_position);
1090Sstevel@tonic-gate 	if (next_nl == NULL) {
1100Sstevel@tonic-gate 		warn(WMSG_NEWLINE_ADDED, str->s_filename);
1110Sstevel@tonic-gate 		str->s_current.l_data_length = MIN(wslen(current_position),
1120Sstevel@tonic-gate 		    end_of_buffer - current_position);
1130Sstevel@tonic-gate 	} else {
1140Sstevel@tonic-gate 		str->s_current.l_data_length = next_nl - current_position;
1150Sstevel@tonic-gate 	}
1160Sstevel@tonic-gate 	*(str->s_current.l_data.wp + str->s_current.l_data_length) = L'\0';
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate 	str->s_current.l_collate.wp = NULL;
1190Sstevel@tonic-gate 	str->s_current.l_collate_length = 0;
1200Sstevel@tonic-gate 
1210Sstevel@tonic-gate 	__S(stats_incr_fetches());
1220Sstevel@tonic-gate 	return (PRIME_SUCCEEDED);
1230Sstevel@tonic-gate }
1240Sstevel@tonic-gate 
1250Sstevel@tonic-gate static ssize_t
stream_wide_fetch(stream_t * str)1260Sstevel@tonic-gate stream_wide_fetch(stream_t *str)
1270Sstevel@tonic-gate {
1280Sstevel@tonic-gate 	ssize_t dist_to_buf_end;
1290Sstevel@tonic-gate 	int ret_val;
1300Sstevel@tonic-gate 	wchar_t *graft_pt;
1310Sstevel@tonic-gate 	wchar_t *next_nl;
1320Sstevel@tonic-gate 
1330Sstevel@tonic-gate 	ASSERT(str->s_status & STREAM_OPEN);
1340Sstevel@tonic-gate 	ASSERT((str->s_status & STREAM_EOS_REACHED) == 0);
1350Sstevel@tonic-gate 
1360Sstevel@tonic-gate 	graft_pt = str->s_current.l_data.wp + str->s_current.l_data_length + 1;
1370Sstevel@tonic-gate 
1380Sstevel@tonic-gate 	if (shelf == SHELF_VACANT)
1390Sstevel@tonic-gate 		str->s_current.l_data.wp = graft_pt;
1400Sstevel@tonic-gate 	else if (str->s_current.l_data_length > -1)
1410Sstevel@tonic-gate 		graft_pt--;
1420Sstevel@tonic-gate 
1430Sstevel@tonic-gate 	dist_to_buf_end = str->s_buffer_size / sizeof (wchar_t) - (graft_pt -
1440Sstevel@tonic-gate 	    (wchar_t *)str->s_buffer);
1450Sstevel@tonic-gate 
1460Sstevel@tonic-gate 	if (dist_to_buf_end <= 1) {
1470Sstevel@tonic-gate 		str->s_current.l_data_length = -1;
1480Sstevel@tonic-gate 		return (NEXT_LINE_INCOMPLETE);
1490Sstevel@tonic-gate 	}
1500Sstevel@tonic-gate 
1510Sstevel@tonic-gate 	if (fgetws(graft_pt, dist_to_buf_end, str->s_type.BF.s_fp) == NULL) {
1520Sstevel@tonic-gate 		if (feof(str->s_type.BF.s_fp))
1530Sstevel@tonic-gate 			stream_set(str, STREAM_EOS_REACHED);
1540Sstevel@tonic-gate 		else
1550Sstevel@tonic-gate 			die(EMSG_READ, str->s_filename);
1560Sstevel@tonic-gate 	}
1570Sstevel@tonic-gate 
1580Sstevel@tonic-gate 	trip_eof(str->s_type.BF.s_fp);
1590Sstevel@tonic-gate 	if ((next_nl = xmemwchar(str->s_current.l_data.wp, L'\n',
1600Sstevel@tonic-gate 	    dist_to_buf_end)) == NULL) {
1610Sstevel@tonic-gate 		str->s_current.l_data_length =
1620Sstevel@tonic-gate 		    MIN(wslen(str->s_current.l_data.wp), dist_to_buf_end);
1630Sstevel@tonic-gate 	} else {
1640Sstevel@tonic-gate 		str->s_current.l_data_length = next_nl -
1650Sstevel@tonic-gate 		    str->s_current.l_data.wp;
1660Sstevel@tonic-gate 	}
1670Sstevel@tonic-gate 
1680Sstevel@tonic-gate 	str->s_current.l_collate_length = 0;
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate 	if (*(str->s_current.l_data.wp + str->s_current.l_data_length) !=
1710Sstevel@tonic-gate 	    L'\n') {
1720Sstevel@tonic-gate 		if (!feof(str->s_type.BF.s_fp)) {
1730Sstevel@tonic-gate 			if (shelf == SHELF_OCCUPIED)
1740Sstevel@tonic-gate 				die(EMSG_MEMORY);
1750Sstevel@tonic-gate 
1760Sstevel@tonic-gate 			shelf = SHELF_OCCUPIED;
1770Sstevel@tonic-gate 			ret_val = NEXT_LINE_INCOMPLETE;
1780Sstevel@tonic-gate 			__S(stats_incr_shelves());
1790Sstevel@tonic-gate 		} else {
1800Sstevel@tonic-gate 			stream_set(str, STREAM_EOS_REACHED);
1810Sstevel@tonic-gate 			warn(WMSG_NEWLINE_ADDED, str->s_filename);
1820Sstevel@tonic-gate 		}
1830Sstevel@tonic-gate 	} else {
1840Sstevel@tonic-gate 		shelf = SHELF_VACANT;
1850Sstevel@tonic-gate 		ret_val = NEXT_LINE_COMPLETE;
1860Sstevel@tonic-gate 		*(str->s_current.l_data.wp + str->s_current.l_data_length) =
1870Sstevel@tonic-gate 		    L'\0';
1880Sstevel@tonic-gate 		__S(stats_incr_fetches());
1890Sstevel@tonic-gate 	}
1900Sstevel@tonic-gate 
1910Sstevel@tonic-gate 	return (ret_val);
1920Sstevel@tonic-gate }
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate ssize_t
stream_wide_fetch_overwrite(stream_t * str)1950Sstevel@tonic-gate stream_wide_fetch_overwrite(stream_t *str)
1960Sstevel@tonic-gate {
1970Sstevel@tonic-gate 	ssize_t dist_to_buf_end;
1980Sstevel@tonic-gate 
1990Sstevel@tonic-gate 	ASSERT(str->s_status & STREAM_OPEN);
2000Sstevel@tonic-gate 	ASSERT((str->s_status & STREAM_EOS_REACHED) == 0);
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate 	str->s_current.l_data.wp = str->s_buffer;
2030Sstevel@tonic-gate 	dist_to_buf_end = str->s_buffer_size / sizeof (wchar_t);
2040Sstevel@tonic-gate 
2050Sstevel@tonic-gate 	if (fgetws(str->s_current.l_data.wp, dist_to_buf_end,
2060Sstevel@tonic-gate 	    str->s_type.BF.s_fp) == NULL) {
2070Sstevel@tonic-gate 		if (feof(str->s_type.BF.s_fp))
2080Sstevel@tonic-gate 			stream_set(str, STREAM_EOS_REACHED);
2090Sstevel@tonic-gate 		else
2100Sstevel@tonic-gate 			die(EMSG_READ, str->s_filename);
2110Sstevel@tonic-gate 	}
2120Sstevel@tonic-gate 
2130Sstevel@tonic-gate 	trip_eof(str->s_type.BF.s_fp);
2140Sstevel@tonic-gate 	str->s_current.l_data_length = wslen(str->s_current.l_data.wp) - 1;
2150Sstevel@tonic-gate 	str->s_current.l_collate_length = 0;
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate 	if (str->s_current.l_data_length == -1 ||
2180Sstevel@tonic-gate 	    *(str->s_current.l_data.wp + str->s_current.l_data_length) !=
2190Sstevel@tonic-gate 	    L'\n') {
2200Sstevel@tonic-gate 		if (!feof(str->s_type.BF.s_fp)) {
2210Sstevel@tonic-gate 			die(EMSG_MEMORY);
2220Sstevel@tonic-gate 		} else {
2230Sstevel@tonic-gate 			stream_set(str, STREAM_EOS_REACHED);
2240Sstevel@tonic-gate 			warn(WMSG_NEWLINE_ADDED, str->s_filename);
2250Sstevel@tonic-gate 			str->s_current.l_data_length++;
2260Sstevel@tonic-gate 		}
2270Sstevel@tonic-gate 	}
2280Sstevel@tonic-gate 
2290Sstevel@tonic-gate 	*(str->s_current.l_data.wp + str->s_current.l_data_length) = L'\0';
2300Sstevel@tonic-gate 
2310Sstevel@tonic-gate 	__S(stats_incr_fetches());
2320Sstevel@tonic-gate 	return (NEXT_LINE_COMPLETE);
2330Sstevel@tonic-gate }
2340Sstevel@tonic-gate 
2350Sstevel@tonic-gate static void
stream_wide_send_eol(stream_t * str)2360Sstevel@tonic-gate stream_wide_send_eol(stream_t *str)
2370Sstevel@tonic-gate {
2380Sstevel@tonic-gate 	wchar_t w_crlf[2] = { L'\n', L'\0' };
2390Sstevel@tonic-gate 
2400Sstevel@tonic-gate 	ASSERT(str->s_status & STREAM_OPEN);
2410Sstevel@tonic-gate 	ASSERT(str->s_status & STREAM_OUTPUT);
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate 	if (wxwrite(str->s_type.SF.s_fd, w_crlf) < 0)
2440Sstevel@tonic-gate 		die(EMSG_WRITE, str->s_filename);
2450Sstevel@tonic-gate }
2460Sstevel@tonic-gate 
2470Sstevel@tonic-gate static void
stream_wide_put_line(stream_t * str,line_rec_t * line)2480Sstevel@tonic-gate stream_wide_put_line(stream_t *str, line_rec_t *line)
2490Sstevel@tonic-gate {
2500Sstevel@tonic-gate 	ASSERT(str->s_status & STREAM_OPEN);
2510Sstevel@tonic-gate 	ASSERT(str->s_status & STREAM_OUTPUT);
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 	if (line->l_data_length >= 0) {
2540Sstevel@tonic-gate 		if (wxwrite(str->s_type.SF.s_fd, line->l_data.wp) >= 0) {
2550Sstevel@tonic-gate 			stream_wide_send_eol(str);
2560Sstevel@tonic-gate 			__S(stats_incr_puts());
2570Sstevel@tonic-gate 		} else
2580Sstevel@tonic-gate 			die(EMSG_WRITE, str->s_filename);
2590Sstevel@tonic-gate 	}
260*5836Snakanon 	safe_free(line->l_raw_collate.wp);
261*5836Snakanon 	line->l_raw_collate.wp = NULL;
2620Sstevel@tonic-gate }
2630Sstevel@tonic-gate 
2640Sstevel@tonic-gate void
stream_wide_put_line_unique(stream_t * str,line_rec_t * line)2650Sstevel@tonic-gate stream_wide_put_line_unique(stream_t *str, line_rec_t *line)
2660Sstevel@tonic-gate {
2670Sstevel@tonic-gate 	static line_rec_t pvs;
2680Sstevel@tonic-gate 	static size_t collate_buf_len;
2690Sstevel@tonic-gate 
2700Sstevel@tonic-gate 	ASSERT(str->s_status & STREAM_OPEN);
2710Sstevel@tonic-gate 	ASSERT(str->s_status & STREAM_OUTPUT);
2720Sstevel@tonic-gate 
2730Sstevel@tonic-gate 	if ((pvs.l_collate.sp == NULL ||
2740Sstevel@tonic-gate 	    collated_wide(&pvs, line, 0, COLL_UNIQUE) != 0) &&
2750Sstevel@tonic-gate 	    line->l_data_length >= 0) {
2760Sstevel@tonic-gate 		stream_wide_put_line(str, line);
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate 		if (line->l_collate_length + sizeof (wchar_t) >
2790Sstevel@tonic-gate 		    collate_buf_len) {
2800Sstevel@tonic-gate 			pvs.l_collate.sp = safe_realloc(pvs.l_collate.sp,
2810Sstevel@tonic-gate 			    line->l_collate_length + sizeof (wchar_t));
2820Sstevel@tonic-gate 			collate_buf_len = line->l_collate_length +
2830Sstevel@tonic-gate 			    sizeof (wchar_t);
2840Sstevel@tonic-gate 		}
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate 		(void) memcpy(pvs.l_collate.sp, line->l_collate.sp,
2870Sstevel@tonic-gate 		    line->l_collate_length);
2880Sstevel@tonic-gate 		/* LINTED ALIGNMENT */
2890Sstevel@tonic-gate 		*(wchar_t *)(pvs.l_collate.sp + line->l_collate_length) = L'\0';
2900Sstevel@tonic-gate 		pvs.l_collate_length = line->l_collate_length;
2910Sstevel@tonic-gate 	}
2920Sstevel@tonic-gate }
2930Sstevel@tonic-gate 
2940Sstevel@tonic-gate static int
stream_wide_eos(stream_t * str)2950Sstevel@tonic-gate stream_wide_eos(stream_t *str)
2960Sstevel@tonic-gate {
2970Sstevel@tonic-gate 	int retval = 0;
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate 	if (str == NULL || str->s_status & STREAM_EOS_REACHED)
3000Sstevel@tonic-gate 		return (1);
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate 	trip_eof(str->s_type.BF.s_fp);
3030Sstevel@tonic-gate 	if (feof(str->s_type.BF.s_fp) &&
3040Sstevel@tonic-gate 	    shelf == SHELF_VACANT &&
3050Sstevel@tonic-gate 	    str->s_current.l_collate_length != -1) {
3060Sstevel@tonic-gate 		retval = 1;
3070Sstevel@tonic-gate 		stream_set(str, STREAM_EOS_REACHED);
3080Sstevel@tonic-gate 	}
3090Sstevel@tonic-gate 
3100Sstevel@tonic-gate 	return (retval);
3110Sstevel@tonic-gate }
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate /*ARGSUSED*/
3140Sstevel@tonic-gate static void
stream_wide_release_line(stream_t * str)3150Sstevel@tonic-gate stream_wide_release_line(stream_t *str)
3160Sstevel@tonic-gate {
3170Sstevel@tonic-gate }
3180Sstevel@tonic-gate 
3190Sstevel@tonic-gate const stream_ops_t stream_wide_ops = {
3200Sstevel@tonic-gate 	stream_stdio_is_closable,
3210Sstevel@tonic-gate 	stream_stdio_close,
3220Sstevel@tonic-gate 	stream_wide_eos,
3230Sstevel@tonic-gate 	stream_wide_fetch,
3240Sstevel@tonic-gate 	stream_stdio_flush,
3250Sstevel@tonic-gate 	stream_stdio_free,
3260Sstevel@tonic-gate 	stream_stdio_open_for_write,
3270Sstevel@tonic-gate 	stream_wide_prime,
3280Sstevel@tonic-gate 	stream_wide_put_line,
3290Sstevel@tonic-gate 	stream_wide_release_line,
3300Sstevel@tonic-gate 	stream_wide_send_eol,
3310Sstevel@tonic-gate 	stream_stdio_unlink
3320Sstevel@tonic-gate };
333