xref: /csrg-svn/contrib/sort/fields.c (revision 62247)
160903Sbostic /*-
2*62247Sbostic  * Copyright (c) 1993
3*62247Sbostic  *	The Regents of the University of California.  All rights reserved.
460903Sbostic  *
560903Sbostic  * This code is derived from software contributed to Berkeley by
660903Sbostic  * Peter McIlroy.
760903Sbostic  *
860903Sbostic  * %sccs.include.redist.c%
960903Sbostic  */
1060903Sbostic 
1160903Sbostic #ifndef lint
12*62247Sbostic static char sccsid[] = "@(#)fields.c	8.1 (Berkeley) 06/06/93";
1360903Sbostic #endif /* not lint */
1460903Sbostic 
1560903Sbostic /* Subroutines to generate sort keys. */
1660903Sbostic 
1760903Sbostic #include "sort.h"
1860903Sbostic 
1960903Sbostic #define blancmange(ptr) {					\
2060903Sbostic 	if (BLANK & d_mask[*(ptr)])				\
2160903Sbostic 		while (BLANK & d_mask[*(++(ptr))]);		\
2260903Sbostic }
2360903Sbostic 
2460903Sbostic #define NEXTCOL(pos) {						\
2560903Sbostic 	if (!SEP_FLAG)						\
2660903Sbostic 		while (BLANK & l_d_mask[*(++pos)]);		\
2760903Sbostic 	while (!((FLD_D | REC_D_F) & l_d_mask[*++pos]));	\
2860903Sbostic }
2960903Sbostic 
3060903Sbostic extern u_char *enterfield __P((u_char *, u_char *, struct field *, int));
3160903Sbostic 
3260903Sbostic extern u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
3360903Sbostic 
3460903Sbostic extern struct coldesc clist[(ND+1)*2];
3560903Sbostic extern int ncols;
3660903Sbostic 
3760903Sbostic #define DECIMAL '.'
3860903Sbostic #define OFFSET 128
3960903Sbostic 
4060903Sbostic u_char TENS[10];	/* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
4160903Sbostic u_char NEGTENS[10];	/* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
4260903Sbostic u_char *OFF_TENS, *OFF_NTENS;	/* TENS - '0', NEGTENS - '0' */
4360903Sbostic u_char fnum[NBINS], rnum[NBINS];
4460903Sbostic 
4560903Sbostic /*
4660903Sbostic  * constructs sort key with leading recheader, followed by the key,
4760903Sbostic  * followed by the original line.
4860903Sbostic  */
4960903Sbostic length_t
enterkey(keybuf,line,size,fieldtable)5060903Sbostic enterkey(keybuf, line, size, fieldtable)
5160903Sbostic 	struct recheader *keybuf;	/* pointer to start of key */
5260903Sbostic 	DBT *line;
5360903Sbostic 	int size;
5460903Sbostic 	struct field fieldtable[];
5560903Sbostic {
5660903Sbostic 	int i;
5760903Sbostic 	register u_char *l_d_mask;
5860903Sbostic 	register u_char *lineend, *pos;
5960903Sbostic 	u_char *endkey, *keypos;
6060903Sbostic 	register struct coldesc *clpos;
6160903Sbostic 	register int col = 1;
6260903Sbostic 	struct field *ftpos;
6360903Sbostic 	l_d_mask = d_mask;
6460903Sbostic 	pos = (u_char *) line->data - 1;
6560903Sbostic 	lineend = (u_char *) line->data + line->size-1;
6660903Sbostic 				/* don't include rec_delimiter */
6760903Sbostic 	keypos = keybuf->data;
6860903Sbostic 
6960903Sbostic 	for (i = 0; i < ncols; i++) {
7060903Sbostic 		clpos = clist + i;
7160903Sbostic 		for (; (col < clpos->num) && (pos < lineend); col++)
7260903Sbostic 			{ NEXTCOL(pos); }
7360903Sbostic 		if (pos >= lineend)
7460903Sbostic 			break;
7560903Sbostic 		clpos->start = SEP_FLAG ? pos + 1 : pos;
7660903Sbostic 		NEXTCOL(pos);
7760903Sbostic 		clpos->end = pos;
7860903Sbostic 		col++;
7960903Sbostic 		if (pos >= lineend) {
8060903Sbostic 			clpos->end = lineend;
8160903Sbostic 			++i;
8260903Sbostic 			break;
8360903Sbostic 		}
8460903Sbostic 	}
8560903Sbostic 	for (; i <= ncols; i++)
8660903Sbostic 		clist[i].start = clist[i].end = lineend;
8760903Sbostic 	if (clist[0].start < (u_char *) line->data)
8860903Sbostic 		++clist[0].start;
8960903Sbostic 	endkey = (u_char *) keybuf + size - line->size;
9060903Sbostic 	for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
9160903Sbostic 		if ((keypos = enterfield(keypos, endkey, ftpos,
9260903Sbostic 		    fieldtable->flags)) == NULL)
9360903Sbostic 			return (1);
9460903Sbostic 
9560903Sbostic 	if (UNIQUE)
9660903Sbostic 		*(keypos-1) = REC_D;
9760903Sbostic 	keybuf->offset = keypos - keybuf->data;
9860903Sbostic 	keybuf->length = keybuf->offset + line->size;
9960903Sbostic 	if (keybuf->length + sizeof(TRECHEADER) > size)
10060903Sbostic 		return (1);		/* line too long for buffer */
10160903Sbostic 	memcpy(keybuf->data + keybuf->offset, line->data, line->size);
10260903Sbostic 	return (0);
10360903Sbostic }
10460903Sbostic 
10560903Sbostic /*
10660903Sbostic  * constructs a field (as defined by -k) within a key
10760903Sbostic  */
10860903Sbostic u_char *
enterfield(tablepos,endkey,cur_fld,gflags)10960903Sbostic enterfield(tablepos, endkey, cur_fld, gflags)
11060903Sbostic 	struct field *cur_fld;
11160903Sbostic 	register u_char *tablepos, *endkey;
11260903Sbostic 	int gflags;
11360903Sbostic {
11460903Sbostic 	register u_char *start, *end, *lineend, *mask, *lweight;
11560903Sbostic 	struct column icol, tcol;
11660903Sbostic 	register u_int flags;
11760903Sbostic 	u_int Rflag;
11860903Sbostic 	icol = cur_fld->icol;
11960903Sbostic 	tcol = cur_fld->tcol;
12060903Sbostic 	flags = cur_fld->flags;
12160903Sbostic 	start = icol.p->start;
12260903Sbostic 	lineend = clist[ncols].end;
12360903Sbostic 	if (flags & BI)
12460903Sbostic 		blancmange(start);
12560903Sbostic 	start += icol.indent;
12660903Sbostic 	start = min(start, lineend);
12760903Sbostic 	if (!tcol.num)
12860903Sbostic 		end = lineend;
12960903Sbostic 	else {
13060903Sbostic 		if (tcol.indent) {
13160903Sbostic 			end = tcol.p->start;
13260903Sbostic 			if (flags & BT) blancmange(end);
13360903Sbostic 			end += tcol.indent;
13460903Sbostic 			end = min(end, lineend);
13560903Sbostic 		} else
13660903Sbostic 			end = tcol.p->end;
13760903Sbostic 	}
13860903Sbostic 	if (flags & N) {
13960903Sbostic 		Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
14060903Sbostic 		tablepos = number(tablepos, endkey, start, end, Rflag);
14160903Sbostic 		return (tablepos);
14260903Sbostic 	}
14360903Sbostic 	mask = alltable;
14460903Sbostic 	mask = cur_fld->mask;
14560903Sbostic 	lweight = cur_fld->weights;
14660903Sbostic 	for (; start < end; start++)
14760903Sbostic 		if (mask[*start]) {
14860903Sbostic 			if (*start <= 1) {
14960903Sbostic 				if (tablepos+2 >= endkey)
15060903Sbostic 					return (NULL);
15160903Sbostic 				*tablepos++ = lweight[1];
15260903Sbostic 				*tablepos++ = lweight[*start ? 2 : 1];
15360903Sbostic 			} else {
15460903Sbostic 				*tablepos++ = lweight[*start];
15560903Sbostic 				if (tablepos == endkey)
15660903Sbostic 				return (NULL);
15760903Sbostic 			}
15860903Sbostic 		}
15960903Sbostic 	*tablepos++ = lweight[0];
16060903Sbostic 	return (tablepos == endkey ? NULL : tablepos);
16160903Sbostic }
16260903Sbostic 
16360903Sbostic /* Uses the first bin to assign sign, expsign, 0, and the first
16460903Sbostic  * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
16560903Sbostic  *   When sorting in forward order:
16660903Sbostic  * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
16760903Sbostic  * else use (0-99)->(2-102).
16860903Sbostic  * If the exponent is >=61, use another byte for each additional 253
16960903Sbostic  * in the exponent. Cutoff is at 567.
17060903Sbostic  * To avoid confusing the exponent and the mantissa, use a field delimiter
17160903Sbostic  * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
17260903Sbostic  * only time a field delimiter can come in that position.
17360903Sbostic  * Reverse order is done analagously.
17460903Sbostic */
17560903Sbostic 
17660903Sbostic u_char *
number(pos,bufend,line,lineend,Rflag)17760903Sbostic number(pos, bufend, line, lineend, Rflag)
17860903Sbostic 	register u_char *line, *pos, *bufend, *lineend;
17960903Sbostic 	int Rflag;
18060903Sbostic {
18160903Sbostic 	register int or_sign, parity = 0;
18260903Sbostic 	register int expincr = 1, exponent = -1;
18360903Sbostic 	int bite, expsign = 1, sign = 1;
18460903Sbostic 	register u_char lastvalue, *nonzero, *tline, *C_TENS;
18560903Sbostic 	u_char *nweights;
18660903Sbostic 
18760903Sbostic 	if (Rflag)
18860903Sbostic 		nweights = rnum;
18960903Sbostic 	else
19060903Sbostic 		nweights = fnum;
19160903Sbostic 	if (pos > bufend - 8)
19260903Sbostic 		return (NULL);
19360903Sbostic 	/* or_sign sets the sort direction:
19460903Sbostic 	 *	(-r: +/-)(sign: +/-)(expsign: +/-) */
19560903Sbostic 	or_sign = sign ^ expsign ^ Rflag;
19660903Sbostic 	blancmange(line);
19760903Sbostic 	if (*line == '-') {	/* set the sign */
19860903Sbostic 		or_sign ^= 1;
19960903Sbostic 		sign = 0;
20060903Sbostic 		line++;
20160903Sbostic 	}
20260903Sbostic 	/* eat initial zeroes */
20360903Sbostic 	for (; *line == '0' && line < lineend; line++);
20460903Sbostic 	/* calculate exponents < 0 */
20560903Sbostic 	if (*line == DECIMAL) {
20660903Sbostic 		exponent = 1;
20760903Sbostic 		while (*++line == '0' && line < lineend)
20860903Sbostic 			exponent++;
20960903Sbostic 		expincr = 0;
21060903Sbostic 		expsign = 0;
21160903Sbostic 	}
21260903Sbostic 	/* next character better be a digit */
21360903Sbostic 	if (*line < '1' || *line > '9' || line >= lineend) {
21460903Sbostic 		*pos++ = nweights[127];
21560903Sbostic 		return (pos);
21660903Sbostic 	}
21760903Sbostic 	if (expincr) {
21860903Sbostic 		for (tline = line-1; *++tline >= '0' &&
21960903Sbostic 		    *tline <= '9' && tline < lineend;)
22060903Sbostic 			exponent++;
22160903Sbostic 	}
22260903Sbostic 	if (exponent > 567) {
22360903Sbostic 		*pos++ = nweights[sign ? (expsign ? 254 : 128)
22460903Sbostic 					: (expsign ? 0 : 126)];
22560903Sbostic 		warnx("exponent out of bounds");
22660903Sbostic 		return (pos);
22760903Sbostic 	}
22860903Sbostic 	bite = min(exponent, 61);
22960903Sbostic 	*pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
23060903Sbostic 				: (expsign ? 64-bite : 64+bite)];
23160903Sbostic 	if (bite >= 61) {
23260903Sbostic 		do {
23360903Sbostic 			exponent -= bite;
23460903Sbostic 			bite = min(exponent, 254);
23560903Sbostic 			*pos++ = nweights[or_sign ? 254-bite : bite];
23660903Sbostic 		} while (bite == 254);
23760903Sbostic 	}
23860903Sbostic 	C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
23960903Sbostic 	for (; line < lineend; line++) {
24060903Sbostic 		if (*line >= '0' && *line <= '9') {
24160903Sbostic 			if (parity) {
24260903Sbostic 				*pos++ = C_TENS[lastvalue] + (or_sign ? - *line
24360903Sbostic 						: *line);
24460903Sbostic 				if (pos == bufend)
24560903Sbostic 					return (NULL);
24660903Sbostic 				if (*line != '0' || lastvalue != '0')
24760903Sbostic 					nonzero = pos;
24860903Sbostic 			} else
24960903Sbostic 				lastvalue = *line;
25060903Sbostic 			parity ^= 1;
25160903Sbostic 		} else if(*line == DECIMAL) {
25260903Sbostic 			if(!expincr)	/* a decimal already occurred once */
25360903Sbostic 				break;
25460903Sbostic 			expincr = 0;
25560903Sbostic 		} else
25660903Sbostic 			break;
25760903Sbostic 	}
25860903Sbostic 	if (parity && lastvalue != '0') {
25960903Sbostic 		*pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
26060903Sbostic 					OFF_TENS[lastvalue] + '0';
26160903Sbostic 	} else
26260903Sbostic 		pos = nonzero;
26360903Sbostic 	if (pos > bufend-1)
26460903Sbostic 		return (NULL);
26560903Sbostic 	*pos++ = or_sign ? nweights[254] : nweights[0];
26660903Sbostic 	return (pos);
26760903Sbostic }
26860903Sbostic 
26960903Sbostic /* This forces a gap around the record delimiter
27060903Sbostic  * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
27160903Sbostic  * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
27260903Sbostic */
27360903Sbostic void
num_init()27460903Sbostic num_init()
27560903Sbostic {
27660903Sbostic 	int i;
27760903Sbostic 	TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
27860903Sbostic 	NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
27960903Sbostic 	OFF_TENS = TENS - '0';
28060903Sbostic 	OFF_NTENS = NEGTENS - '0';
28160903Sbostic 	for (i = 1; i < 10; i++) {
28260903Sbostic 		TENS[i] = TENS[i-1] + 10;
28360903Sbostic 		NEGTENS[i] = NEGTENS[i-1] - 10;
28460903Sbostic 	}
28560903Sbostic 	for (i = 0; i < REC_D; i++) {
28660903Sbostic 		fnum[i] = i;
28760903Sbostic 		rnum[255-i] = i;
28860903Sbostic 	}
28960903Sbostic 	for (i = REC_D; i <255; i++) {
29060903Sbostic 		fnum[i] = i+1;
29160903Sbostic 		rnum[255-i] = i-1;
29260903Sbostic 	}
29360903Sbostic }
294