xref: /netbsd-src/usr.bin/sort/init.c (revision 404fbe5fb94ca1e054339640cabb2801ce52dd30)
1 /*	$NetBSD: init.c,v 1.18 2008/04/28 20:24:15 martin Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Peter McIlroy.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 #include "sort.h"
65 
66 #ifndef lint
67 __RCSID("$NetBSD: init.c,v 1.18 2008/04/28 20:24:15 martin Exp $");
68 __SCCSID("@(#)init.c	8.1 (Berkeley) 6/6/93");
69 #endif /* not lint */
70 
71 #include <ctype.h>
72 #include <string.h>
73 
74 static void insertcol __P((struct field *));
75 static const char *setcolumn __P((const char *, struct field *, int));
76 
77 u_char gweights[NBINS];
78 
79 /*
80  * masks of ignored characters.  Alltable is 256 ones.
81  */
82 static u_char alltable[NBINS], dtable[NBINS], itable[NBINS];
83 
84 /*
85  * parsed key options
86  */
87 struct coldesc *clist = NULL;
88 int ncols = 0;
89 
90 /*
91  * clist (list of columns which correspond to one or more icol or tcol)
92  * is in increasing order of columns.
93  * Fields are kept in increasing order of fields.
94  */
95 
96 /*
97  * keep clist in order--inserts a column in a sorted array
98  */
99 static void
100 insertcol(field)
101 	struct field *field;
102 {
103 	int i;
104 	struct coldesc *p;
105 
106 	/* Make space for new item */
107 	p = realloc(clist, (ncols + 2) * sizeof(*clist));
108 	if (!p)
109 		err(1, "realloc");
110 	clist = p;
111 	memset(&clist[ncols], 0, sizeof(clist[ncols]));
112 
113 	for (i = 0; i < ncols; i++)
114 		if (field->icol.num <= clist[i].num)
115 			break;
116 	if (field->icol.num != clist[i].num) {
117 		memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
118 		clist[i].num = field->icol.num;
119 		ncols++;
120 	}
121 	if (field->tcol.num && field->tcol.num != field->icol.num) {
122 		for (i = 0; i < ncols; i++)
123 			if (field->tcol.num <= clist[i].num)
124 				break;
125 		if (field->tcol.num != clist[i].num) {
126 			memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
127 			clist[i].num = field->tcol.num;
128 			ncols++;
129 		}
130 	}
131 }
132 
133 /*
134  * matches fields with the appropriate columns--n^2 but who cares?
135  */
136 void
137 fldreset(fldtab)
138 	struct field *fldtab;
139 {
140 	int i;
141 
142 	fldtab[0].tcol.p = clist + ncols - 1;
143 	for (++fldtab; fldtab->icol.num; ++fldtab) {
144 		for (i = 0; fldtab->icol.num != clist[i].num; i++)
145 			;
146 		fldtab->icol.p = clist + i;
147 		if (!fldtab->tcol.num)
148 			continue;
149 		for (i = 0; fldtab->tcol.num != clist[i].num; i++)
150 			;
151 		fldtab->tcol.p = clist + i;
152 	}
153 }
154 
155 /*
156  * interprets a column in a -k field
157  */
158 static const char *
159 setcolumn(pos, cur_fld, gflag)
160 	const char *pos;
161 	struct field *cur_fld;
162 	int gflag;
163 {
164 	struct column *col;
165 	char *npos;
166 	int tmp;
167 	col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol);
168 	col->num = (int) strtol(pos, &npos, 10);
169 	pos = npos;
170 	if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
171 		errx(2, "field numbers must be positive");
172 	if (*pos == '.') {
173 		if (!col->num)
174 			errx(2, "cannot indent end of line");
175 		++pos;
176 		col->indent = (int) strtol(pos, &npos, 10);
177 		pos = npos;
178 		if (&cur_fld->icol == col)
179 			col->indent--;
180 		if (col->indent < 0)
181 			errx(2, "illegal offset");
182 	}
183 	for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++)
184 		cur_fld->flags |= tmp;
185 	if (cur_fld->icol.num == 0)
186 		cur_fld->icol.num = 1;
187 	return (pos);
188 }
189 
190 int
191 setfield(pos, cur_fld, gflag)
192 	const char *pos;
193 	struct field *cur_fld;
194 	int gflag;
195 {
196 	int tmp;
197 
198 	cur_fld->weights = ascii;
199 	cur_fld->mask = alltable;
200 
201 	pos = setcolumn(pos, cur_fld, gflag);
202 	if (*pos == '\0')			/* key extends to EOL. */
203 		cur_fld->tcol.num = 0;
204 	else {
205 		if (*pos != ',')
206 			errx(2, "illegal field descriptor");
207 		setcolumn((++pos), cur_fld, gflag);
208 	}
209 	if (!cur_fld->flags)
210 		cur_fld->flags = gflag;
211 	tmp = cur_fld->flags;
212 
213 	/*
214 	 * Assign appropriate mask table and weight table.
215 	 * If the global weights are reversed, the local field
216 	 * must be "re-reversed".
217 	 */
218 	if (((tmp & R) ^ (gflag & R)) && (tmp & F))
219 		cur_fld->weights = RFtable;
220 	else if (tmp & F)
221 		cur_fld->weights = Ftable;
222 	else if ((tmp & R) ^ (gflag & R))
223 		cur_fld->weights = Rascii;
224 
225 	if (tmp & I)
226 		cur_fld->mask = itable;
227 	else if (tmp & D)
228 		cur_fld->mask = dtable;
229 
230 	cur_fld->flags |= (gflag & (BI | BT));
231 	if (!cur_fld->tcol.indent)	/* BT has no meaning at end of field */
232 		cur_fld->flags &= ~BT;
233 
234 	if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
235 	    && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
236 	    && cur_fld->tcol.indent != 0 /* == 0 -> end of field, i.e. okay */
237 	    && cur_fld->tcol.indent < cur_fld->icol.indent))
238 		errx(2, "fields out of order");
239 	insertcol(cur_fld);
240 	return (cur_fld->tcol.num);
241 }
242 
243 int
244 optval(desc, tcolflag)
245 	int desc, tcolflag;
246 {
247 	switch(desc) {
248 		case 'b':
249 			if (!tcolflag)
250 				return (BI);
251 			else
252 				return (BT);
253 		case 'd': return (D);
254 		case 'f': return (F);
255 		case 'i': return (I);
256 		case 'n': return (N);
257 		case 'r': return (R);
258 		default:  return (0);
259 	}
260 }
261 
262 /*
263  * Replace historic +SPEC arguments with appropriate -kSPEC.
264  */
265 void
266 fixit(argc, argv)
267 	int *argc;
268 	char **argv;
269 {
270 	int i, j, fplus=0;
271 	char *vpos, *tpos, spec[20];
272 	int col, indent;
273 	size_t sz;
274 
275 	for (i = 1; i < *argc; i++) {
276 		if (argv[i][0] != '+' && !fplus)
277 			continue;
278 
279 		if (fplus && (argv[i][0] != '-' || !isdigit((unsigned char)argv[i][1]))) {
280 			fplus = 0;
281 			if (argv[i][0] != '+') {
282 				/* not a -POS argument, skip */
283 				continue;
284 			}
285 		}
286 
287 		/* parse spec */
288 		tpos = argv[i]+1;
289 		col = (int)strtol(tpos, &tpos, 10);
290 		if (*tpos == '.') {
291 			++tpos;
292 			indent = (int) strtol(tpos, &tpos, 10);
293 		} else
294 			indent = 0;
295 		/* tpos points to optional flags now */
296 
297 		/*
298 		 * For x.y, the obsolescent variant assumed 0 == beginning
299 		 * of line, while the new form uses 0 == end of line.
300 		 * Convert accordingly.
301 		 */
302 		if (!fplus) {
303 			/* +POS */
304 			col += 1;
305 			indent += 1;
306 		} else {
307 			/* -POS */
308 			if (indent > 0)
309 				col += 1;
310 		}
311 
312 		/* new style spec */
313 		sz = snprintf(spec, sizeof(spec), "%d.%d%s", col, indent,
314 		    tpos);
315 
316 		if (!fplus) {
317 			/* Replace the +POS argument with new-style -kSPEC */
318 			asprintf(&vpos, "-k%s", spec);
319 			argv[i] = vpos;
320 			fplus = 1;
321 		} else {
322 			/*
323 			 * Append the spec to one previously generated from
324 			 * +POS argument, and remove the argv element.
325 			 */
326 			asprintf(&vpos, "%s,%s", argv[i-1], spec);
327 			free(argv[i-1]);
328 			argv[i-1] = vpos;
329 			for (j=i; j < *argc; j++)
330 				argv[j] = argv[j+1];
331 			*argc -= 1;
332 			i--;
333 		}
334 	}
335 }
336 
337 /*
338  * ascii, Rascii, Ftable, and RFtable map
339  * REC_D -> REC_D;  {not REC_D} -> {not REC_D}.
340  * gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
341  * Note: when sorting in forward order, to encode character zero in a key,
342  * use \001\001; character 1 becomes \001\002.  In this case, character 0
343  * is reserved for the field delimiter.  Analagously for -r (fld_d = 255).
344  * Note: this is only good for ASCII sorting.  For different LC 's,
345  * all bets are off.  See also num_init in number.c
346  */
347 void
348 settables(gflags)
349 	int gflags;
350 {
351 	u_char *wts;
352 	int i, incr;
353 	for (i=0; i < 256; i++) {
354 		ascii[i] = i;
355 		if (i > REC_D && i < 255 - REC_D+1)
356 			Rascii[i] = 255 - i + 1;
357 		else
358 			Rascii[i] = 255 - i;
359 		if (islower(i)) {
360 			Ftable[i] = Ftable[toupper(i)];
361 			RFtable[i] = RFtable[toupper(i)];
362 		} else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
363 			Ftable[i] = i + 1;
364 			RFtable[i] = Rascii[i] - 1;
365 		} else {
366 			Ftable[i] = i;
367 			RFtable[i] = Rascii[i];
368 		}
369 		alltable[i] = 1;
370 
371 		if (i == '\n' || isprint(i))
372 			itable[i] = 1;
373 		else
374 			itable[i] = 0;
375 
376 		if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
377 			dtable[i] = 1;
378 		else
379 			dtable[i] = 0;
380 	}
381 
382 	Rascii[REC_D] = RFtable[REC_D] = REC_D;
383 	if (isupper(REC_D))
384 		Ftable[tolower(REC_D)]++;
385 
386 	if ((gflags & R) && !((gflags & F) && SINGL_FLD))
387 		wts = Rascii;
388 	else if (!((gflags & F) && SINGL_FLD))
389 		wts = ascii;
390 	else if (gflags & R)
391 		wts = RFtable;
392 	else
393 		wts = Ftable;
394 
395 	memmove(gweights, wts, sizeof(gweights));
396 	incr = (gflags & R) ? -1 : 1;
397 	for (i = 0; i < REC_D; i++)
398 		gweights[i] += incr;
399 	gweights[REC_D] = ((gflags & R) ? 255 : 0);
400 	if (SINGL_FLD && (gflags & F)) {
401 		for (i = 0; i < REC_D; i++) {
402 			ascii[i] += incr;
403 			Rascii[i] += incr;
404 		}
405 		ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
406 	}
407 }
408