xref: /onnv-gate/usr/src/cmd/sort/common/options.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include "options.h"
30*0Sstevel@tonic-gate 
31*0Sstevel@tonic-gate /*
32*0Sstevel@tonic-gate  * options
33*0Sstevel@tonic-gate  *
34*0Sstevel@tonic-gate  * Overview
35*0Sstevel@tonic-gate  *   sort(1) supports two methods for specifying the sort key:  the original,
36*0Sstevel@tonic-gate  *   now-obsolete, +n -m form and the POSIX -k n,m form.  We refer to the former
37*0Sstevel@tonic-gate  *   as "old specifiers" and the latter as "new specifiers".  The options()
38*0Sstevel@tonic-gate  *   function parses the command line arguments given to sort, placing the sort
39*0Sstevel@tonic-gate  *   key specifiers in the internal representation used in fields.c.
40*0Sstevel@tonic-gate  *
41*0Sstevel@tonic-gate  * Equivalence of specifiers
42*0Sstevel@tonic-gate  *   One of sort(1)'s standard peculiarities is the transformation of the
43*0Sstevel@tonic-gate  *   character offsets and field numbering between the new and old style field
44*0Sstevel@tonic-gate  *   specifications.  We simply quote from the Single Unix standard:
45*0Sstevel@tonic-gate  *
46*0Sstevel@tonic-gate  *	+w.xT -y.zU
47*0Sstevel@tonic-gate  *
48*0Sstevel@tonic-gate  *   is equivalent to
49*0Sstevel@tonic-gate  *
50*0Sstevel@tonic-gate  * 	undefined		when z == 0, U contains b, and -t is set
51*0Sstevel@tonic-gate  * 	-k w+1.x+1T,y.0U	when z == 0 otherwise
52*0Sstevel@tonic-gate  * 	-k w+1.x+1T,y+1.zU	when z > 0
53*0Sstevel@tonic-gate  *
54*0Sstevel@tonic-gate  *   Undoubtedly, this seemed logical at the time.  (Using only the field head
55*0Sstevel@tonic-gate  *   as the coordinate, as done in the obsolete version, seems much simpler.)
56*0Sstevel@tonic-gate  *   The reverse map is where the key specifier
57*0Sstevel@tonic-gate  *
58*0Sstevel@tonic-gate  *	-k w.xT,y.zU
59*0Sstevel@tonic-gate  *
60*0Sstevel@tonic-gate  *   is equivalent to
61*0Sstevel@tonic-gate  *
62*0Sstevel@tonic-gate  * 	undefined		when z == 0, U contains b, and -t is set
63*0Sstevel@tonic-gate  *	+w-1.x-1T,y.0U		when z == 0 otherwise
64*0Sstevel@tonic-gate  *	+w-1.x-1T,y-1.z		when z > 0
65*0Sstevel@tonic-gate  *
66*0Sstevel@tonic-gate  *   in the obsolete syntax.  Because the original key specifiers lead to a
67*0Sstevel@tonic-gate  *   simpler implementation, the internal representation of a field in this
68*0Sstevel@tonic-gate  *   implementation of sort is mostly that given by the obsolete syntax.
69*0Sstevel@tonic-gate  */
70*0Sstevel@tonic-gate 
71*0Sstevel@tonic-gate /*
72*0Sstevel@tonic-gate  * While a key specifier in the obsolete +m ... -n form is being defined (that
73*0Sstevel@tonic-gate  * is, before the closing -n is seen), a narrower set of options is permitted.
74*0Sstevel@tonic-gate  * We specify this smaller set of options in OLD_SPEC_OPTIONS_STRING.
75*0Sstevel@tonic-gate  */
76*0Sstevel@tonic-gate #define	OPTIONS_STRING		"cmuo:T:z:dfiMnrbt:k:S:0123456789"
77*0Sstevel@tonic-gate #define	OLD_SPEC_OPTIONS_STRING	"bdfiMnrcmuo:T:z:t:k:S:"
78*0Sstevel@tonic-gate 
79*0Sstevel@tonic-gate #define	OPTIONS_OLDSPEC		0x1	/* else new-style spec */
80*0Sstevel@tonic-gate #define	OPTIONS_STARTSPEC	0x2	/* else end spec */
81*0Sstevel@tonic-gate 
82*0Sstevel@tonic-gate static int
is_number(char * C)83*0Sstevel@tonic-gate is_number(char *C)
84*0Sstevel@tonic-gate {
85*0Sstevel@tonic-gate 	size_t	i;
86*0Sstevel@tonic-gate 
87*0Sstevel@tonic-gate 	for (i = 0; i < strlen(C); i++)
88*0Sstevel@tonic-gate 		if (!isdigit((uchar_t)C[i]))
89*0Sstevel@tonic-gate 			return (0);
90*0Sstevel@tonic-gate 
91*0Sstevel@tonic-gate 	return (1);
92*0Sstevel@tonic-gate }
93*0Sstevel@tonic-gate 
94*0Sstevel@tonic-gate /*
95*0Sstevel@tonic-gate  * If a field specified by the -k option or by the +n syntax contains any
96*0Sstevel@tonic-gate  * modifiers, then the current global field modifiers are not inherited.
97*0Sstevel@tonic-gate  */
98*0Sstevel@tonic-gate static int
field_spec_has_modifiers(char * C,int length)99*0Sstevel@tonic-gate field_spec_has_modifiers(char *C, int length)
100*0Sstevel@tonic-gate {
101*0Sstevel@tonic-gate 	int p_nonmodifiers = strspn(C, ",.1234567890");
102*0Sstevel@tonic-gate 
103*0Sstevel@tonic-gate 	if (p_nonmodifiers == length)
104*0Sstevel@tonic-gate 		return (0);
105*0Sstevel@tonic-gate 
106*0Sstevel@tonic-gate 	return (1);
107*0Sstevel@tonic-gate }
108*0Sstevel@tonic-gate 
109*0Sstevel@tonic-gate static void
field_apply_all(field_t * fc,flag_t flags)110*0Sstevel@tonic-gate field_apply_all(field_t *fc, flag_t flags)
111*0Sstevel@tonic-gate {
112*0Sstevel@tonic-gate 	field_t *f;
113*0Sstevel@tonic-gate 
114*0Sstevel@tonic-gate 	for (f = fc; f; f = f->f_next)
115*0Sstevel@tonic-gate 		if ((f->f_options & FIELD_MODIFIERS_DEFINED) == 0)
116*0Sstevel@tonic-gate 			f->f_options |= flags;
117*0Sstevel@tonic-gate }
118*0Sstevel@tonic-gate 
119*0Sstevel@tonic-gate static int
parse_field_spec(field_t * F,char * C,int flags,int length)120*0Sstevel@tonic-gate parse_field_spec(field_t *F, char *C, int flags, int length)
121*0Sstevel@tonic-gate {
122*0Sstevel@tonic-gate 	int p_period = MIN(length, strcspn(C, "."));
123*0Sstevel@tonic-gate 	int p_modifiers = MIN(length, strspn(C, ".1234567890"));
124*0Sstevel@tonic-gate 	int p_boundary = MIN(p_period, p_modifiers);
125*0Sstevel@tonic-gate 	int field = 0;
126*0Sstevel@tonic-gate 	int offset = 0;
127*0Sstevel@tonic-gate 	int offset_seen = 0;
128*0Sstevel@tonic-gate 	int i;
129*0Sstevel@tonic-gate 	int blanks_flag = 0;
130*0Sstevel@tonic-gate 
131*0Sstevel@tonic-gate 	for (i = 0; i < p_boundary; i++) {
132*0Sstevel@tonic-gate 		if (isdigit((uchar_t)C[i]))
133*0Sstevel@tonic-gate 			field = (10 * field) + (C[i] - '0');
134*0Sstevel@tonic-gate 		else
135*0Sstevel@tonic-gate 			return (1);
136*0Sstevel@tonic-gate 	}
137*0Sstevel@tonic-gate 
138*0Sstevel@tonic-gate 	if (p_period < p_modifiers) {
139*0Sstevel@tonic-gate 		for (i = p_period + 1; i < p_modifiers; i++) {
140*0Sstevel@tonic-gate 			if (isdigit((uchar_t)C[i])) {
141*0Sstevel@tonic-gate 				offset_seen++;
142*0Sstevel@tonic-gate 				offset = (10 * offset) + (C[i] - '0');
143*0Sstevel@tonic-gate 			} else {
144*0Sstevel@tonic-gate 				return (1);
145*0Sstevel@tonic-gate 			}
146*0Sstevel@tonic-gate 		}
147*0Sstevel@tonic-gate 	}
148*0Sstevel@tonic-gate 
149*0Sstevel@tonic-gate 	if (p_modifiers < length) {
150*0Sstevel@tonic-gate 		for (i = p_modifiers; i < length; i++) {
151*0Sstevel@tonic-gate 			switch (C[i]) {
152*0Sstevel@tonic-gate 				case 'b':
153*0Sstevel@tonic-gate 					blanks_flag = 1;
154*0Sstevel@tonic-gate 					break;
155*0Sstevel@tonic-gate 				case 'd':
156*0Sstevel@tonic-gate 					F->f_options |= FIELD_DICTIONARY_ORDER;
157*0Sstevel@tonic-gate 					break;
158*0Sstevel@tonic-gate 				case 'f':
159*0Sstevel@tonic-gate 					F->f_options |= FIELD_FOLD_UPPERCASE;
160*0Sstevel@tonic-gate 					break;
161*0Sstevel@tonic-gate 				case 'i':
162*0Sstevel@tonic-gate 					F->f_options |=
163*0Sstevel@tonic-gate 					    FIELD_IGNORE_NONPRINTABLES;
164*0Sstevel@tonic-gate 					break;
165*0Sstevel@tonic-gate 				case 'M':
166*0Sstevel@tonic-gate 					F->f_species = MONTH;
167*0Sstevel@tonic-gate 					break;
168*0Sstevel@tonic-gate 				case 'n':
169*0Sstevel@tonic-gate 					F->f_species = NUMERIC;
170*0Sstevel@tonic-gate 					break;
171*0Sstevel@tonic-gate 				case 'r':
172*0Sstevel@tonic-gate 					F->f_options |=
173*0Sstevel@tonic-gate 					    FIELD_REVERSE_COMPARISONS;
174*0Sstevel@tonic-gate 					break;
175*0Sstevel@tonic-gate 				default:
176*0Sstevel@tonic-gate 					usage();
177*0Sstevel@tonic-gate 					break;
178*0Sstevel@tonic-gate 			}
179*0Sstevel@tonic-gate 		}
180*0Sstevel@tonic-gate 	}
181*0Sstevel@tonic-gate 
182*0Sstevel@tonic-gate 	if (flags & OPTIONS_STARTSPEC) {
183*0Sstevel@tonic-gate 		F->f_start_field = field;
184*0Sstevel@tonic-gate 		F->f_start_offset = offset;
185*0Sstevel@tonic-gate 		if ((flags & OPTIONS_OLDSPEC) != OPTIONS_OLDSPEC) {
186*0Sstevel@tonic-gate 			F->f_start_field--;
187*0Sstevel@tonic-gate 			if (offset_seen)
188*0Sstevel@tonic-gate 				F->f_start_offset--;
189*0Sstevel@tonic-gate 		}
190*0Sstevel@tonic-gate 		F->f_options |= blanks_flag ? FIELD_IGNORE_BLANKS_START : 0;
191*0Sstevel@tonic-gate 	} else {
192*0Sstevel@tonic-gate 		F->f_end_field = field;
193*0Sstevel@tonic-gate 		F->f_end_offset = offset;
194*0Sstevel@tonic-gate 		if ((flags & OPTIONS_OLDSPEC) != OPTIONS_OLDSPEC &&
195*0Sstevel@tonic-gate 		    offset_seen && offset != 0)
196*0Sstevel@tonic-gate 			F->f_end_field--;
197*0Sstevel@tonic-gate 		F->f_options |= blanks_flag ? FIELD_IGNORE_BLANKS_END : 0;
198*0Sstevel@tonic-gate 	}
199*0Sstevel@tonic-gate 
200*0Sstevel@tonic-gate 	return (0);
201*0Sstevel@tonic-gate }
202*0Sstevel@tonic-gate 
203*0Sstevel@tonic-gate static void
parse_new_field_spec(sort_t * S,char * arg)204*0Sstevel@tonic-gate parse_new_field_spec(sort_t *S, char *arg)
205*0Sstevel@tonic-gate {
206*0Sstevel@tonic-gate 	int length = strlen(arg);
207*0Sstevel@tonic-gate 	int p_comma = MIN(length, strcspn(arg, ","));
208*0Sstevel@tonic-gate 	field_t *nF;
209*0Sstevel@tonic-gate 	int p;
210*0Sstevel@tonic-gate 
211*0Sstevel@tonic-gate 	/*
212*0Sstevel@tonic-gate 	 * New field specifiers do not inherit from the general specifier if
213*0Sstevel@tonic-gate 	 * they have any modifiers set.  (This is specifically tested in the VSC
214*0Sstevel@tonic-gate 	 * test suite, assertion 32 for POSIX.cmd/sort.)
215*0Sstevel@tonic-gate 	 */
216*0Sstevel@tonic-gate 	if (field_spec_has_modifiers(arg, length)) {
217*0Sstevel@tonic-gate 		nF = field_new(NULL);
218*0Sstevel@tonic-gate 		nF->f_options = FIELD_MODIFIERS_DEFINED;
219*0Sstevel@tonic-gate 	} else {
220*0Sstevel@tonic-gate 		nF = field_new(S);
221*0Sstevel@tonic-gate 	}
222*0Sstevel@tonic-gate 	p = parse_field_spec(nF, arg, OPTIONS_STARTSPEC, p_comma);
223*0Sstevel@tonic-gate 
224*0Sstevel@tonic-gate 	if (p != 0)
225*0Sstevel@tonic-gate 		usage();
226*0Sstevel@tonic-gate 
227*0Sstevel@tonic-gate 	if (p_comma < length) {
228*0Sstevel@tonic-gate 		p = parse_field_spec(nF, &(arg[p_comma + 1]), 0,
229*0Sstevel@tonic-gate 		    strlen(&(arg[p_comma + 1])));
230*0Sstevel@tonic-gate 		if (p != 0)
231*0Sstevel@tonic-gate 			usage();
232*0Sstevel@tonic-gate 	}
233*0Sstevel@tonic-gate 
234*0Sstevel@tonic-gate 	if (nF->f_start_field < 0 || nF->f_start_offset < 0) {
235*0Sstevel@tonic-gate 		if (S->m_verbose)
236*0Sstevel@tonic-gate 			warn("-k %s is not a supported field specifier\n", arg);
237*0Sstevel@tonic-gate 	}
238*0Sstevel@tonic-gate 	nF->f_start_field = MAX(nF->f_start_field, 0);
239*0Sstevel@tonic-gate 	nF->f_start_offset = MAX(nF->f_start_offset, 0);
240*0Sstevel@tonic-gate 
241*0Sstevel@tonic-gate 	/*
242*0Sstevel@tonic-gate 	 * If the starting field exceeds a defined ending field, convention
243*0Sstevel@tonic-gate 	 * dictates that the field is ignored.
244*0Sstevel@tonic-gate 	 */
245*0Sstevel@tonic-gate 	if (nF->f_end_field == -1 || nF->f_start_field < nF->f_end_field ||
246*0Sstevel@tonic-gate 	    (nF->f_start_field == nF->f_end_field &&
247*0Sstevel@tonic-gate 	    nF->f_start_offset < nF->f_end_offset)) {
248*0Sstevel@tonic-gate 		field_add_to_chain(&(S->m_fields_head), nF);
249*0Sstevel@tonic-gate 	} else if (S->m_verbose) {
250*0Sstevel@tonic-gate 		warn("illegal field -k %s omitted", arg);
251*0Sstevel@tonic-gate 	}
252*0Sstevel@tonic-gate }
253*0Sstevel@tonic-gate 
254*0Sstevel@tonic-gate /*
255*0Sstevel@tonic-gate  * parse_old_field_spec() is getopt()-aware; it may modify the values of optind,
256*0Sstevel@tonic-gate  * optarg, and so forth, to correctly determine the characteristics being
257*0Sstevel@tonic-gate  * assigned to the current field.
258*0Sstevel@tonic-gate  */
259*0Sstevel@tonic-gate static int
parse_old_field_spec(sort_t * S,int argc,char * argv[])260*0Sstevel@tonic-gate parse_old_field_spec(sort_t *S, int argc, char *argv[])
261*0Sstevel@tonic-gate {
262*0Sstevel@tonic-gate 	field_t *nF;
263*0Sstevel@tonic-gate 	int c, p;
264*0Sstevel@tonic-gate 	char *arg = argv[optind];
265*0Sstevel@tonic-gate 
266*0Sstevel@tonic-gate 	if (field_spec_has_modifiers(arg + 1, strlen(arg + 1))) {
267*0Sstevel@tonic-gate 		nF = field_new(NULL);
268*0Sstevel@tonic-gate 		nF->f_options = FIELD_MODIFIERS_DEFINED;
269*0Sstevel@tonic-gate 	} else {
270*0Sstevel@tonic-gate 		nF = field_new(S);
271*0Sstevel@tonic-gate 	}
272*0Sstevel@tonic-gate 
273*0Sstevel@tonic-gate 	p = parse_field_spec(nF, arg + 1, OPTIONS_OLDSPEC | OPTIONS_STARTSPEC,
274*0Sstevel@tonic-gate 	    strlen(arg + 1));
275*0Sstevel@tonic-gate 
276*0Sstevel@tonic-gate 	if (p != 0) {
277*0Sstevel@tonic-gate 		field_delete(nF);
278*0Sstevel@tonic-gate 		return (0);
279*0Sstevel@tonic-gate 	}
280*0Sstevel@tonic-gate 
281*0Sstevel@tonic-gate 	/*
282*0Sstevel@tonic-gate 	 * In the case that getopt() returns '?' (unrecognized option) or EOF
283*0Sstevel@tonic-gate 	 * (non-option argument), the field is considered closed.
284*0Sstevel@tonic-gate 	 */
285*0Sstevel@tonic-gate 	for (arg = argv[++optind]; optind < argc; arg = argv[optind]) {
286*0Sstevel@tonic-gate 		if (strlen(arg) >= 2 && *arg == '-' &&
287*0Sstevel@tonic-gate 		    isdigit(*(uchar_t *)(arg + 1))) {
288*0Sstevel@tonic-gate 			(void) parse_field_spec(nF, arg + 1,
289*0Sstevel@tonic-gate 			    OPTIONS_OLDSPEC, strlen(arg) - 1);
290*0Sstevel@tonic-gate 			field_add_to_chain(&(S->m_fields_head), nF);
291*0Sstevel@tonic-gate 			optind++;
292*0Sstevel@tonic-gate 			return (1);
293*0Sstevel@tonic-gate 		}
294*0Sstevel@tonic-gate 
295*0Sstevel@tonic-gate 		if ((c = getopt(argc, argv, OLD_SPEC_OPTIONS_STRING)) != EOF) {
296*0Sstevel@tonic-gate 			switch (c) {
297*0Sstevel@tonic-gate 			case 'b':
298*0Sstevel@tonic-gate 				nF->f_options |= FIELD_IGNORE_BLANKS_START;
299*0Sstevel@tonic-gate 				break;
300*0Sstevel@tonic-gate 			case 'd':
301*0Sstevel@tonic-gate 				nF->f_options |= FIELD_DICTIONARY_ORDER;
302*0Sstevel@tonic-gate 				break;
303*0Sstevel@tonic-gate 			case 'f':
304*0Sstevel@tonic-gate 				nF->f_options |= FIELD_FOLD_UPPERCASE;
305*0Sstevel@tonic-gate 				break;
306*0Sstevel@tonic-gate 			case 'i':
307*0Sstevel@tonic-gate 				nF->f_options |= FIELD_IGNORE_NONPRINTABLES;
308*0Sstevel@tonic-gate 				break;
309*0Sstevel@tonic-gate 			case 'M':
310*0Sstevel@tonic-gate 				nF->f_species = MONTH;
311*0Sstevel@tonic-gate 				break;
312*0Sstevel@tonic-gate 			case 'n':
313*0Sstevel@tonic-gate 				nF->f_species = NUMERIC;
314*0Sstevel@tonic-gate 				break;
315*0Sstevel@tonic-gate 			case 'r':
316*0Sstevel@tonic-gate 				nF->f_options |= FIELD_REVERSE_COMPARISONS;
317*0Sstevel@tonic-gate 				break;
318*0Sstevel@tonic-gate 			case '?':
319*0Sstevel@tonic-gate 			case 'c':
320*0Sstevel@tonic-gate 			case 'm':
321*0Sstevel@tonic-gate 			case 'u':
322*0Sstevel@tonic-gate 				/*
323*0Sstevel@tonic-gate 				 * Options without arguments.
324*0Sstevel@tonic-gate 				 */
325*0Sstevel@tonic-gate 				optind -= 1;
326*0Sstevel@tonic-gate 				field_add_to_chain(&(S->m_fields_head), nF);
327*0Sstevel@tonic-gate 				return (1);
328*0Sstevel@tonic-gate 				/*NOTREACHED*/
329*0Sstevel@tonic-gate 			case 'o':
330*0Sstevel@tonic-gate 			case 'T':
331*0Sstevel@tonic-gate 			case 'z':
332*0Sstevel@tonic-gate 			case 't':
333*0Sstevel@tonic-gate 			case 'k':
334*0Sstevel@tonic-gate 			case 'S':
335*0Sstevel@tonic-gate 				/*
336*0Sstevel@tonic-gate 				 * Options with arguments.
337*0Sstevel@tonic-gate 				 */
338*0Sstevel@tonic-gate 				if (optarg == argv[optind - 1] + 2) {
339*0Sstevel@tonic-gate 					optind -= 1;
340*0Sstevel@tonic-gate 				} else {
341*0Sstevel@tonic-gate 					optind -= 2;
342*0Sstevel@tonic-gate 				}
343*0Sstevel@tonic-gate 				field_add_to_chain(&(S->m_fields_head), nF);
344*0Sstevel@tonic-gate 				return (1);
345*0Sstevel@tonic-gate 				/*NOTREACHED*/
346*0Sstevel@tonic-gate 			default:
347*0Sstevel@tonic-gate 				die(EMSG_UNKN_OPTION);
348*0Sstevel@tonic-gate 				/*NOTREACHED*/
349*0Sstevel@tonic-gate 			}
350*0Sstevel@tonic-gate 		} else {
351*0Sstevel@tonic-gate 			break;
352*0Sstevel@tonic-gate 		}
353*0Sstevel@tonic-gate 	}
354*0Sstevel@tonic-gate 
355*0Sstevel@tonic-gate 	field_add_to_chain(&(S->m_fields_head), nF);
356*0Sstevel@tonic-gate 	return (1);
357*0Sstevel@tonic-gate }
358*0Sstevel@tonic-gate 
359*0Sstevel@tonic-gate int
options(sort_t * S,int argc,char * argv[])360*0Sstevel@tonic-gate options(sort_t *S, int argc, char *argv[])
361*0Sstevel@tonic-gate {
362*0Sstevel@tonic-gate 	int c;
363*0Sstevel@tonic-gate 
364*0Sstevel@tonic-gate 	optind = 1;
365*0Sstevel@tonic-gate 	while (optind < argc) {
366*0Sstevel@tonic-gate 		if (strncmp("-y", argv[optind], strlen("-y")) == 0) {
367*0Sstevel@tonic-gate 			/*
368*0Sstevel@tonic-gate 			 * The -y [kmem] option violates the standard syntax
369*0Sstevel@tonic-gate 			 * outlined in intro(1).  we have to be a little fancy
370*0Sstevel@tonic-gate 			 * to determine if the next argument is a valid integer.
371*0Sstevel@tonic-gate 			 * (note, of course, that the previous sort(1) had no
372*0Sstevel@tonic-gate 			 * mechanism to resolve a final
373*0Sstevel@tonic-gate 			 *	-y 99999
374*0Sstevel@tonic-gate 			 * into
375*0Sstevel@tonic-gate 			 *	-y, file 99999
376*0Sstevel@tonic-gate 			 * or
377*0Sstevel@tonic-gate 			 *	-y 99999, file stdin
378*0Sstevel@tonic-gate 			 *
379*0Sstevel@tonic-gate 			 * Now one can unambiguously use
380*0Sstevel@tonic-gate 			 *	-y -- 99999
381*0Sstevel@tonic-gate 			 * and
382*0Sstevel@tonic-gate 			 *	-y 99999 -
383*0Sstevel@tonic-gate 			 * to distinguish these cases.
384*0Sstevel@tonic-gate 			 *
385*0Sstevel@tonic-gate 			 * That said, we do not use the information passed using
386*0Sstevel@tonic-gate 			 * -y option in sort(1); we provide the argument to
387*0Sstevel@tonic-gate 			 * preserve compatibility for existing scripts.
388*0Sstevel@tonic-gate 			 */
389*0Sstevel@tonic-gate 			if (strlen(argv[optind]) == strlen("-y") &&
390*0Sstevel@tonic-gate 			    optind + 1 < argc &&
391*0Sstevel@tonic-gate 			    is_number(argv[optind + 1]))
392*0Sstevel@tonic-gate 				optind += 2;
393*0Sstevel@tonic-gate 			else
394*0Sstevel@tonic-gate 				optind += 1;
395*0Sstevel@tonic-gate 		}
396*0Sstevel@tonic-gate 
397*0Sstevel@tonic-gate 		if ((c = getopt(argc, argv, OPTIONS_STRING)) != EOF) {
398*0Sstevel@tonic-gate 			switch (c) {
399*0Sstevel@tonic-gate 			case 'c':
400*0Sstevel@tonic-gate 				S->m_check_if_sorted_only = 1;
401*0Sstevel@tonic-gate 				break;
402*0Sstevel@tonic-gate 
403*0Sstevel@tonic-gate 			case 'm':
404*0Sstevel@tonic-gate 				S->m_merge_only = 1;
405*0Sstevel@tonic-gate 				break;
406*0Sstevel@tonic-gate 
407*0Sstevel@tonic-gate 			case 'u':
408*0Sstevel@tonic-gate 				S->m_unique_lines = 1;
409*0Sstevel@tonic-gate 				break;
410*0Sstevel@tonic-gate 
411*0Sstevel@tonic-gate 			case 'o':
412*0Sstevel@tonic-gate 				S->m_output_filename = optarg;
413*0Sstevel@tonic-gate 				break;
414*0Sstevel@tonic-gate 
415*0Sstevel@tonic-gate 			case 'T':
416*0Sstevel@tonic-gate 				S->m_tmpdir_template = optarg;
417*0Sstevel@tonic-gate 				break;
418*0Sstevel@tonic-gate 
419*0Sstevel@tonic-gate 			case 'z':
420*0Sstevel@tonic-gate 				/*
421*0Sstevel@tonic-gate 				 * ignore optarg -- obsolete
422*0Sstevel@tonic-gate 				 */
423*0Sstevel@tonic-gate 				break;
424*0Sstevel@tonic-gate 
425*0Sstevel@tonic-gate 			case 'd':
426*0Sstevel@tonic-gate 				S->m_field_options |= FIELD_DICTIONARY_ORDER;
427*0Sstevel@tonic-gate 				field_apply_all(S->m_fields_head,
428*0Sstevel@tonic-gate 				    FIELD_DICTIONARY_ORDER);
429*0Sstevel@tonic-gate 				break;
430*0Sstevel@tonic-gate 
431*0Sstevel@tonic-gate 			case 'f':
432*0Sstevel@tonic-gate 				S->m_field_options |= FIELD_FOLD_UPPERCASE;
433*0Sstevel@tonic-gate 				field_apply_all(S->m_fields_head,
434*0Sstevel@tonic-gate 				    FIELD_FOLD_UPPERCASE);
435*0Sstevel@tonic-gate 				break;
436*0Sstevel@tonic-gate 
437*0Sstevel@tonic-gate 			case 'i':
438*0Sstevel@tonic-gate 				S->m_field_options |=
439*0Sstevel@tonic-gate 				    FIELD_IGNORE_NONPRINTABLES;
440*0Sstevel@tonic-gate 				field_apply_all(S->m_fields_head,
441*0Sstevel@tonic-gate 				    FIELD_IGNORE_NONPRINTABLES);
442*0Sstevel@tonic-gate 				break;
443*0Sstevel@tonic-gate 
444*0Sstevel@tonic-gate 			case 'M':
445*0Sstevel@tonic-gate 				S->m_default_species = MONTH;
446*0Sstevel@tonic-gate 				S->m_field_options &=
447*0Sstevel@tonic-gate 				    ~FIELD_IGNORE_BLANKS_START;
448*0Sstevel@tonic-gate 				break;
449*0Sstevel@tonic-gate 
450*0Sstevel@tonic-gate 			case 'n':
451*0Sstevel@tonic-gate 				S->m_default_species = NUMERIC;
452*0Sstevel@tonic-gate 				{
453*0Sstevel@tonic-gate 					field_t *f;
454*0Sstevel@tonic-gate 
455*0Sstevel@tonic-gate 					for (f = S->m_fields_head; f;
456*0Sstevel@tonic-gate 					    f = f->f_next)
457*0Sstevel@tonic-gate 						if ((f->f_options &
458*0Sstevel@tonic-gate 						    FIELD_MODIFIERS_DEFINED) ==
459*0Sstevel@tonic-gate 						    0)
460*0Sstevel@tonic-gate 							f->f_species = NUMERIC;
461*0Sstevel@tonic-gate 				}
462*0Sstevel@tonic-gate 				break;
463*0Sstevel@tonic-gate 
464*0Sstevel@tonic-gate 			case 'b':
465*0Sstevel@tonic-gate 				S->m_field_options |=
466*0Sstevel@tonic-gate 				    FIELD_IGNORE_BLANKS_START |
467*0Sstevel@tonic-gate 				    FIELD_IGNORE_BLANKS_END;
468*0Sstevel@tonic-gate 				break;
469*0Sstevel@tonic-gate 
470*0Sstevel@tonic-gate 			case 'r':
471*0Sstevel@tonic-gate 				S->m_field_options |=
472*0Sstevel@tonic-gate 				    FIELD_REVERSE_COMPARISONS;
473*0Sstevel@tonic-gate 				field_apply_all(S->m_fields_head,
474*0Sstevel@tonic-gate 				    FIELD_REVERSE_COMPARISONS);
475*0Sstevel@tonic-gate 				break;
476*0Sstevel@tonic-gate 
477*0Sstevel@tonic-gate 			case 't':
478*0Sstevel@tonic-gate 				/*
479*0Sstevel@tonic-gate 				 * delimiter
480*0Sstevel@tonic-gate 				 */
481*0Sstevel@tonic-gate 				if (S->m_single_byte_locale) {
482*0Sstevel@tonic-gate 					/*
483*0Sstevel@tonic-gate 					 * Most debuggers can't take tabs as
484*0Sstevel@tonic-gate 					 * input arguments, so we provide an
485*0Sstevel@tonic-gate 					 * escape sequence to allow testing of
486*0Sstevel@tonic-gate 					 * this special case for the DEBUG
487*0Sstevel@tonic-gate 					 * version.
488*0Sstevel@tonic-gate 					 */
489*0Sstevel@tonic-gate 					S->m_field_separator.sc =
490*0Sstevel@tonic-gate #ifdef DEBUG
491*0Sstevel@tonic-gate 					    xstreql(optarg, "\\t") ? '\t' :
492*0Sstevel@tonic-gate #endif
493*0Sstevel@tonic-gate 					    optarg[0];
494*0Sstevel@tonic-gate 				} else
495*0Sstevel@tonic-gate 					(void) mbtowc(&S->m_field_separator.wc,
496*0Sstevel@tonic-gate 					    optarg, MB_CUR_MAX);
497*0Sstevel@tonic-gate 				break;
498*0Sstevel@tonic-gate 
499*0Sstevel@tonic-gate 			case 'k':
500*0Sstevel@tonic-gate 				/*
501*0Sstevel@tonic-gate 				 * key
502*0Sstevel@tonic-gate 				 */
503*0Sstevel@tonic-gate 				(void) parse_new_field_spec(S, optarg);
504*0Sstevel@tonic-gate 				break;
505*0Sstevel@tonic-gate 
506*0Sstevel@tonic-gate 			case 'S':
507*0Sstevel@tonic-gate 				S->m_memory_limit = strtomem(optarg);
508*0Sstevel@tonic-gate #ifdef DEBUG
509*0Sstevel@tonic-gate 				(void) fprintf(stderr, CMDNAME
510*0Sstevel@tonic-gate 				    ": limiting size to %d bytes\n",
511*0Sstevel@tonic-gate 				    S->m_memory_limit);
512*0Sstevel@tonic-gate #endif /* DEBUG */
513*0Sstevel@tonic-gate 				break;
514*0Sstevel@tonic-gate 
515*0Sstevel@tonic-gate 			/*
516*0Sstevel@tonic-gate 			 * We never take a naked -999; these should always be
517*0Sstevel@tonic-gate 			 * associated with a preceding +000.
518*0Sstevel@tonic-gate 			 */
519*0Sstevel@tonic-gate 			case '0':
520*0Sstevel@tonic-gate 			case '1':
521*0Sstevel@tonic-gate 			case '2':
522*0Sstevel@tonic-gate 			case '3':
523*0Sstevel@tonic-gate 			case '4':
524*0Sstevel@tonic-gate 			case '5':
525*0Sstevel@tonic-gate 			case '6':
526*0Sstevel@tonic-gate 			case '7':
527*0Sstevel@tonic-gate 			case '8':
528*0Sstevel@tonic-gate 			case '9':
529*0Sstevel@tonic-gate 				usage();
530*0Sstevel@tonic-gate 				break;
531*0Sstevel@tonic-gate 			case '?':
532*0Sstevel@tonic-gate 				/* error case */
533*0Sstevel@tonic-gate 				usage();
534*0Sstevel@tonic-gate 				break;
535*0Sstevel@tonic-gate 			}
536*0Sstevel@tonic-gate 
537*0Sstevel@tonic-gate 			/*
538*0Sstevel@tonic-gate 			 * Go back for next argument.
539*0Sstevel@tonic-gate 			 */
540*0Sstevel@tonic-gate 			continue;
541*0Sstevel@tonic-gate 		}
542*0Sstevel@tonic-gate 
543*0Sstevel@tonic-gate 		/*
544*0Sstevel@tonic-gate 		 * There are three (interpretable) possibilities for getopt() to
545*0Sstevel@tonic-gate 		 * return EOF with arguments on the command line: we have seen
546*0Sstevel@tonic-gate 		 * the "end-of-options" token, --, we have encountered the
547*0Sstevel@tonic-gate 		 * old-style field definition, +NNN, or we have found a
548*0Sstevel@tonic-gate 		 * filename.
549*0Sstevel@tonic-gate 		 *
550*0Sstevel@tonic-gate 		 * In the second case, we must also search for the optional -NNN
551*0Sstevel@tonic-gate 		 * field terminal definition.  (since "+joe", for instance, is
552*0Sstevel@tonic-gate 		 * a valid filename, we must handle this pattern as well.)  This
553*0Sstevel@tonic-gate 		 * is performed by parse_old_field_spec().
554*0Sstevel@tonic-gate 		 */
555*0Sstevel@tonic-gate 		if (xstreql(argv[optind - 1], "--")) {
556*0Sstevel@tonic-gate 			/*
557*0Sstevel@tonic-gate 			 * Process all arguments following end-of-options token
558*0Sstevel@tonic-gate 			 * as filenames.
559*0Sstevel@tonic-gate 			 */
560*0Sstevel@tonic-gate 			while (optind < argc) {
561*0Sstevel@tonic-gate 				if (xstreql(argv[optind], "-"))
562*0Sstevel@tonic-gate 					S->m_input_from_stdin = 1;
563*0Sstevel@tonic-gate 				else
564*0Sstevel@tonic-gate 					stream_add_file_to_chain(
565*0Sstevel@tonic-gate 					    &(S->m_input_streams),
566*0Sstevel@tonic-gate 					    argv[optind]);
567*0Sstevel@tonic-gate 				optind++;
568*0Sstevel@tonic-gate 			}
569*0Sstevel@tonic-gate 
570*0Sstevel@tonic-gate 			break;
571*0Sstevel@tonic-gate 		}
572*0Sstevel@tonic-gate 
573*0Sstevel@tonic-gate 		if (optind < argc) {
574*0Sstevel@tonic-gate 			if (xstreql(argv[optind], "-")) {
575*0Sstevel@tonic-gate 				S->m_input_from_stdin = 1;
576*0Sstevel@tonic-gate 				optind++;
577*0Sstevel@tonic-gate 			} else if (*(argv[optind]) != '+' ||
578*0Sstevel@tonic-gate 			    !parse_old_field_spec(S, argc, argv)) {
579*0Sstevel@tonic-gate 				/*
580*0Sstevel@tonic-gate 				 * It's a filename, because it either doesn't
581*0Sstevel@tonic-gate 				 * start with '+', or if it did, it wasn't an
582*0Sstevel@tonic-gate 				 * actual field specifier.
583*0Sstevel@tonic-gate 				 */
584*0Sstevel@tonic-gate 				stream_add_file_to_chain(&(S->m_input_streams),
585*0Sstevel@tonic-gate 				    argv[optind]);
586*0Sstevel@tonic-gate 				optind++;
587*0Sstevel@tonic-gate 			}
588*0Sstevel@tonic-gate 		}
589*0Sstevel@tonic-gate 	}
590*0Sstevel@tonic-gate 
591*0Sstevel@tonic-gate 	if (S->m_input_streams == NULL)
592*0Sstevel@tonic-gate 		S->m_input_from_stdin = 1;
593*0Sstevel@tonic-gate 
594*0Sstevel@tonic-gate 	if (S->m_output_filename == NULL)
595*0Sstevel@tonic-gate 		S->m_output_to_stdout = 1;
596*0Sstevel@tonic-gate 
597*0Sstevel@tonic-gate 	/*
598*0Sstevel@tonic-gate 	 * If no fields, then one great field.  However, if the -b option was
599*0Sstevel@tonic-gate 	 * set globally, be sure to ignore it, as per UNIX98.
600*0Sstevel@tonic-gate 	 */
601*0Sstevel@tonic-gate 	if (S->m_fields_head == NULL) {
602*0Sstevel@tonic-gate 		S->m_field_options &= ~FIELD_IGNORE_BLANKS_START;
603*0Sstevel@tonic-gate 
604*0Sstevel@tonic-gate 		(void) parse_new_field_spec(S, "1");
605*0Sstevel@tonic-gate 		/*
606*0Sstevel@tonic-gate 		 * "Entire line" fast path is only valid if no delimiter has
607*0Sstevel@tonic-gate 		 * been set and no modifiers have been applied.
608*0Sstevel@tonic-gate 		 */
609*0Sstevel@tonic-gate 		if (S->m_field_separator.wc == 0 &&
610*0Sstevel@tonic-gate 		    S->m_default_species == ALPHA &&
611*0Sstevel@tonic-gate 		    S->m_field_options == 0)
612*0Sstevel@tonic-gate 			S->m_entire_line = 1;
613*0Sstevel@tonic-gate 	}
614*0Sstevel@tonic-gate 
615*0Sstevel@tonic-gate 	return (0);
616*0Sstevel@tonic-gate }
617