xref: /csrg-svn/contrib/sort/fields.c (revision 60903)
1*60903Sbostic /*-
2*60903Sbostic  * Copyright (c) 1993 The Regents of the University of California.
3*60903Sbostic  * All rights reserved.
4*60903Sbostic  *
5*60903Sbostic  * This code is derived from software contributed to Berkeley by
6*60903Sbostic  * Peter McIlroy.
7*60903Sbostic  *
8*60903Sbostic  * %sccs.include.redist.c%
9*60903Sbostic  */
10*60903Sbostic 
11*60903Sbostic #ifndef lint
12*60903Sbostic static char sccsid[] = "@(#)fields.c	5.1 (Berkeley) 06/01/93";
13*60903Sbostic #endif /* not lint */
14*60903Sbostic 
15*60903Sbostic /* Subroutines to generate sort keys. */
16*60903Sbostic 
17*60903Sbostic #include "sort.h"
18*60903Sbostic 
19*60903Sbostic #define blancmange(ptr) {					\
20*60903Sbostic 	if (BLANK & d_mask[*(ptr)])				\
21*60903Sbostic 		while (BLANK & d_mask[*(++(ptr))]);		\
22*60903Sbostic }
23*60903Sbostic 
24*60903Sbostic #define NEXTCOL(pos) {						\
25*60903Sbostic 	if (!SEP_FLAG)						\
26*60903Sbostic 		while (BLANK & l_d_mask[*(++pos)]);		\
27*60903Sbostic 	while (!((FLD_D | REC_D_F) & l_d_mask[*++pos]));	\
28*60903Sbostic }
29*60903Sbostic 
30*60903Sbostic extern u_char *enterfield __P((u_char *, u_char *, struct field *, int));
31*60903Sbostic 
32*60903Sbostic extern u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
33*60903Sbostic 
34*60903Sbostic extern struct coldesc clist[(ND+1)*2];
35*60903Sbostic extern int ncols;
36*60903Sbostic 
37*60903Sbostic #define DECIMAL '.'
38*60903Sbostic #define OFFSET 128
39*60903Sbostic 
40*60903Sbostic u_char TENS[10];	/* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
41*60903Sbostic u_char NEGTENS[10];	/* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
42*60903Sbostic u_char *OFF_TENS, *OFF_NTENS;	/* TENS - '0', NEGTENS - '0' */
43*60903Sbostic u_char fnum[NBINS], rnum[NBINS];
44*60903Sbostic 
45*60903Sbostic /*
46*60903Sbostic  * constructs sort key with leading recheader, followed by the key,
47*60903Sbostic  * followed by the original line.
48*60903Sbostic  */
49*60903Sbostic length_t
50*60903Sbostic enterkey(keybuf, line, size, fieldtable)
51*60903Sbostic 	struct recheader *keybuf;	/* pointer to start of key */
52*60903Sbostic 	DBT *line;
53*60903Sbostic 	int size;
54*60903Sbostic 	struct field fieldtable[];
55*60903Sbostic {
56*60903Sbostic 	int i;
57*60903Sbostic 	register u_char *l_d_mask;
58*60903Sbostic 	register u_char *lineend, *pos;
59*60903Sbostic 	u_char *endkey, *keypos;
60*60903Sbostic 	register struct coldesc *clpos;
61*60903Sbostic 	register int col = 1;
62*60903Sbostic 	struct field *ftpos;
63*60903Sbostic 	l_d_mask = d_mask;
64*60903Sbostic 	pos = (u_char *) line->data - 1;
65*60903Sbostic 	lineend = (u_char *) line->data + line->size-1;
66*60903Sbostic 				/* don't include rec_delimiter */
67*60903Sbostic 	keypos = keybuf->data;
68*60903Sbostic 
69*60903Sbostic 	for (i = 0; i < ncols; i++) {
70*60903Sbostic 		clpos = clist + i;
71*60903Sbostic 		for (; (col < clpos->num) && (pos < lineend); col++)
72*60903Sbostic 			{ NEXTCOL(pos); }
73*60903Sbostic 		if (pos >= lineend)
74*60903Sbostic 			break;
75*60903Sbostic 		clpos->start = SEP_FLAG ? pos + 1 : pos;
76*60903Sbostic 		NEXTCOL(pos);
77*60903Sbostic 		clpos->end = pos;
78*60903Sbostic 		col++;
79*60903Sbostic 		if (pos >= lineend) {
80*60903Sbostic 			clpos->end = lineend;
81*60903Sbostic 			++i;
82*60903Sbostic 			break;
83*60903Sbostic 		}
84*60903Sbostic 	}
85*60903Sbostic 	for (; i <= ncols; i++)
86*60903Sbostic 		clist[i].start = clist[i].end = lineend;
87*60903Sbostic 	if (clist[0].start < (u_char *) line->data)
88*60903Sbostic 		++clist[0].start;
89*60903Sbostic 	endkey = (u_char *) keybuf + size - line->size;
90*60903Sbostic 	for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
91*60903Sbostic 		if ((keypos = enterfield(keypos, endkey, ftpos,
92*60903Sbostic 		    fieldtable->flags)) == NULL)
93*60903Sbostic 			return (1);
94*60903Sbostic 
95*60903Sbostic 	if (UNIQUE)
96*60903Sbostic 		*(keypos-1) = REC_D;
97*60903Sbostic 	keybuf->offset = keypos - keybuf->data;
98*60903Sbostic 	keybuf->length = keybuf->offset + line->size;
99*60903Sbostic 	if (keybuf->length + sizeof(TRECHEADER) > size)
100*60903Sbostic 		return (1);		/* line too long for buffer */
101*60903Sbostic 	memcpy(keybuf->data + keybuf->offset, line->data, line->size);
102*60903Sbostic 	return (0);
103*60903Sbostic }
104*60903Sbostic 
105*60903Sbostic /*
106*60903Sbostic  * constructs a field (as defined by -k) within a key
107*60903Sbostic  */
108*60903Sbostic u_char *
109*60903Sbostic enterfield(tablepos, endkey, cur_fld, gflags)
110*60903Sbostic 	struct field *cur_fld;
111*60903Sbostic 	register u_char *tablepos, *endkey;
112*60903Sbostic 	int gflags;
113*60903Sbostic {
114*60903Sbostic 	register u_char *start, *end, *lineend, *mask, *lweight;
115*60903Sbostic 	struct column icol, tcol;
116*60903Sbostic 	register u_int flags;
117*60903Sbostic 	u_int Rflag;
118*60903Sbostic 	icol = cur_fld->icol;
119*60903Sbostic 	tcol = cur_fld->tcol;
120*60903Sbostic 	flags = cur_fld->flags;
121*60903Sbostic 	start = icol.p->start;
122*60903Sbostic 	lineend = clist[ncols].end;
123*60903Sbostic 	if (flags & BI)
124*60903Sbostic 		blancmange(start);
125*60903Sbostic 	start += icol.indent;
126*60903Sbostic 	start = min(start, lineend);
127*60903Sbostic 	if (!tcol.num)
128*60903Sbostic 		end = lineend;
129*60903Sbostic 	else {
130*60903Sbostic 		if (tcol.indent) {
131*60903Sbostic 			end = tcol.p->start;
132*60903Sbostic 			if (flags & BT) blancmange(end);
133*60903Sbostic 			end += tcol.indent;
134*60903Sbostic 			end = min(end, lineend);
135*60903Sbostic 		} else
136*60903Sbostic 			end = tcol.p->end;
137*60903Sbostic 	}
138*60903Sbostic 	if (flags & N) {
139*60903Sbostic 		Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
140*60903Sbostic 		tablepos = number(tablepos, endkey, start, end, Rflag);
141*60903Sbostic 		return (tablepos);
142*60903Sbostic 	}
143*60903Sbostic 	mask = alltable;
144*60903Sbostic 	mask = cur_fld->mask;
145*60903Sbostic 	lweight = cur_fld->weights;
146*60903Sbostic 	for (; start < end; start++)
147*60903Sbostic 		if (mask[*start]) {
148*60903Sbostic 			if (*start <= 1) {
149*60903Sbostic 				if (tablepos+2 >= endkey)
150*60903Sbostic 					return (NULL);
151*60903Sbostic 				*tablepos++ = lweight[1];
152*60903Sbostic 				*tablepos++ = lweight[*start ? 2 : 1];
153*60903Sbostic 			} else {
154*60903Sbostic 				*tablepos++ = lweight[*start];
155*60903Sbostic 				if (tablepos == endkey)
156*60903Sbostic 				return (NULL);
157*60903Sbostic 			}
158*60903Sbostic 		}
159*60903Sbostic 	*tablepos++ = lweight[0];
160*60903Sbostic 	return (tablepos == endkey ? NULL : tablepos);
161*60903Sbostic }
162*60903Sbostic 
163*60903Sbostic /* Uses the first bin to assign sign, expsign, 0, and the first
164*60903Sbostic  * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
165*60903Sbostic  *   When sorting in forward order:
166*60903Sbostic  * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
167*60903Sbostic  * else use (0-99)->(2-102).
168*60903Sbostic  * If the exponent is >=61, use another byte for each additional 253
169*60903Sbostic  * in the exponent. Cutoff is at 567.
170*60903Sbostic  * To avoid confusing the exponent and the mantissa, use a field delimiter
171*60903Sbostic  * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
172*60903Sbostic  * only time a field delimiter can come in that position.
173*60903Sbostic  * Reverse order is done analagously.
174*60903Sbostic */
175*60903Sbostic 
176*60903Sbostic u_char *
177*60903Sbostic number(pos, bufend, line, lineend, Rflag)
178*60903Sbostic 	register u_char *line, *pos, *bufend, *lineend;
179*60903Sbostic 	int Rflag;
180*60903Sbostic {
181*60903Sbostic 	register int or_sign, parity = 0;
182*60903Sbostic 	register int expincr = 1, exponent = -1;
183*60903Sbostic 	int bite, expsign = 1, sign = 1;
184*60903Sbostic 	register u_char lastvalue, *nonzero, *tline, *C_TENS;
185*60903Sbostic 	u_char *nweights;
186*60903Sbostic 
187*60903Sbostic 	if (Rflag)
188*60903Sbostic 		nweights = rnum;
189*60903Sbostic 	else
190*60903Sbostic 		nweights = fnum;
191*60903Sbostic 	if (pos > bufend - 8)
192*60903Sbostic 		return (NULL);
193*60903Sbostic 	/* or_sign sets the sort direction:
194*60903Sbostic 	 *	(-r: +/-)(sign: +/-)(expsign: +/-) */
195*60903Sbostic 	or_sign = sign ^ expsign ^ Rflag;
196*60903Sbostic 	blancmange(line);
197*60903Sbostic 	if (*line == '-') {	/* set the sign */
198*60903Sbostic 		or_sign ^= 1;
199*60903Sbostic 		sign = 0;
200*60903Sbostic 		line++;
201*60903Sbostic 	}
202*60903Sbostic 	/* eat initial zeroes */
203*60903Sbostic 	for (; *line == '0' && line < lineend; line++);
204*60903Sbostic 	/* calculate exponents < 0 */
205*60903Sbostic 	if (*line == DECIMAL) {
206*60903Sbostic 		exponent = 1;
207*60903Sbostic 		while (*++line == '0' && line < lineend)
208*60903Sbostic 			exponent++;
209*60903Sbostic 		expincr = 0;
210*60903Sbostic 		expsign = 0;
211*60903Sbostic 	}
212*60903Sbostic 	/* next character better be a digit */
213*60903Sbostic 	if (*line < '1' || *line > '9' || line >= lineend) {
214*60903Sbostic 		*pos++ = nweights[127];
215*60903Sbostic 		return (pos);
216*60903Sbostic 	}
217*60903Sbostic 	if (expincr) {
218*60903Sbostic 		for (tline = line-1; *++tline >= '0' &&
219*60903Sbostic 		    *tline <= '9' && tline < lineend;)
220*60903Sbostic 			exponent++;
221*60903Sbostic 	}
222*60903Sbostic 	if (exponent > 567) {
223*60903Sbostic 		*pos++ = nweights[sign ? (expsign ? 254 : 128)
224*60903Sbostic 					: (expsign ? 0 : 126)];
225*60903Sbostic 		warnx("exponent out of bounds");
226*60903Sbostic 		return (pos);
227*60903Sbostic 	}
228*60903Sbostic 	bite = min(exponent, 61);
229*60903Sbostic 	*pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
230*60903Sbostic 				: (expsign ? 64-bite : 64+bite)];
231*60903Sbostic 	if (bite >= 61) {
232*60903Sbostic 		do {
233*60903Sbostic 			exponent -= bite;
234*60903Sbostic 			bite = min(exponent, 254);
235*60903Sbostic 			*pos++ = nweights[or_sign ? 254-bite : bite];
236*60903Sbostic 		} while (bite == 254);
237*60903Sbostic 	}
238*60903Sbostic 	C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
239*60903Sbostic 	for (; line < lineend; line++) {
240*60903Sbostic 		if (*line >= '0' && *line <= '9') {
241*60903Sbostic 			if (parity) {
242*60903Sbostic 				*pos++ = C_TENS[lastvalue] + (or_sign ? - *line
243*60903Sbostic 						: *line);
244*60903Sbostic 				if (pos == bufend)
245*60903Sbostic 					return (NULL);
246*60903Sbostic 				if (*line != '0' || lastvalue != '0')
247*60903Sbostic 					nonzero = pos;
248*60903Sbostic 			} else
249*60903Sbostic 				lastvalue = *line;
250*60903Sbostic 			parity ^= 1;
251*60903Sbostic 		} else if(*line == DECIMAL) {
252*60903Sbostic 			if(!expincr)	/* a decimal already occurred once */
253*60903Sbostic 				break;
254*60903Sbostic 			expincr = 0;
255*60903Sbostic 		} else
256*60903Sbostic 			break;
257*60903Sbostic 	}
258*60903Sbostic 	if (parity && lastvalue != '0') {
259*60903Sbostic 		*pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
260*60903Sbostic 					OFF_TENS[lastvalue] + '0';
261*60903Sbostic 	} else
262*60903Sbostic 		pos = nonzero;
263*60903Sbostic 	if (pos > bufend-1)
264*60903Sbostic 		return (NULL);
265*60903Sbostic 	*pos++ = or_sign ? nweights[254] : nweights[0];
266*60903Sbostic 	return (pos);
267*60903Sbostic }
268*60903Sbostic 
269*60903Sbostic /* This forces a gap around the record delimiter
270*60903Sbostic  * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
271*60903Sbostic  * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
272*60903Sbostic */
273*60903Sbostic void
274*60903Sbostic num_init()
275*60903Sbostic {
276*60903Sbostic 	int i;
277*60903Sbostic 	TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
278*60903Sbostic 	NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
279*60903Sbostic 	OFF_TENS = TENS - '0';
280*60903Sbostic 	OFF_NTENS = NEGTENS - '0';
281*60903Sbostic 	for (i = 1; i < 10; i++) {
282*60903Sbostic 		TENS[i] = TENS[i-1] + 10;
283*60903Sbostic 		NEGTENS[i] = NEGTENS[i-1] - 10;
284*60903Sbostic 	}
285*60903Sbostic 	for (i = 0; i < REC_D; i++) {
286*60903Sbostic 		fnum[i] = i;
287*60903Sbostic 		rnum[255-i] = i;
288*60903Sbostic 	}
289*60903Sbostic 	for (i = REC_D; i <255; i++) {
290*60903Sbostic 		fnum[i] = i+1;
291*60903Sbostic 		rnum[255-i] = i-1;
292*60903Sbostic 	}
293*60903Sbostic }
294