1*60903Sbostic /*- 2*60903Sbostic * Copyright (c) 1993 The Regents of the University of California. 3*60903Sbostic * All rights reserved. 4*60903Sbostic * 5*60903Sbostic * This code is derived from software contributed to Berkeley by 6*60903Sbostic * Peter McIlroy. 7*60903Sbostic * 8*60903Sbostic * %sccs.include.redist.c% 9*60903Sbostic */ 10*60903Sbostic 11*60903Sbostic #ifndef lint 12*60903Sbostic static char sccsid[] = "@(#)fields.c 5.1 (Berkeley) 06/01/93"; 13*60903Sbostic #endif /* not lint */ 14*60903Sbostic 15*60903Sbostic /* Subroutines to generate sort keys. */ 16*60903Sbostic 17*60903Sbostic #include "sort.h" 18*60903Sbostic 19*60903Sbostic #define blancmange(ptr) { \ 20*60903Sbostic if (BLANK & d_mask[*(ptr)]) \ 21*60903Sbostic while (BLANK & d_mask[*(++(ptr))]); \ 22*60903Sbostic } 23*60903Sbostic 24*60903Sbostic #define NEXTCOL(pos) { \ 25*60903Sbostic if (!SEP_FLAG) \ 26*60903Sbostic while (BLANK & l_d_mask[*(++pos)]); \ 27*60903Sbostic while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \ 28*60903Sbostic } 29*60903Sbostic 30*60903Sbostic extern u_char *enterfield __P((u_char *, u_char *, struct field *, int)); 31*60903Sbostic 32*60903Sbostic extern u_char *number __P((u_char *, u_char *, u_char *, u_char *, int)); 33*60903Sbostic 34*60903Sbostic extern struct coldesc clist[(ND+1)*2]; 35*60903Sbostic extern int ncols; 36*60903Sbostic 37*60903Sbostic #define DECIMAL '.' 38*60903Sbostic #define OFFSET 128 39*60903Sbostic 40*60903Sbostic u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */ 41*60903Sbostic u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */ 42*60903Sbostic u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */ 43*60903Sbostic u_char fnum[NBINS], rnum[NBINS]; 44*60903Sbostic 45*60903Sbostic /* 46*60903Sbostic * constructs sort key with leading recheader, followed by the key, 47*60903Sbostic * followed by the original line. 48*60903Sbostic */ 49*60903Sbostic length_t 50*60903Sbostic enterkey(keybuf, line, size, fieldtable) 51*60903Sbostic struct recheader *keybuf; /* pointer to start of key */ 52*60903Sbostic DBT *line; 53*60903Sbostic int size; 54*60903Sbostic struct field fieldtable[]; 55*60903Sbostic { 56*60903Sbostic int i; 57*60903Sbostic register u_char *l_d_mask; 58*60903Sbostic register u_char *lineend, *pos; 59*60903Sbostic u_char *endkey, *keypos; 60*60903Sbostic register struct coldesc *clpos; 61*60903Sbostic register int col = 1; 62*60903Sbostic struct field *ftpos; 63*60903Sbostic l_d_mask = d_mask; 64*60903Sbostic pos = (u_char *) line->data - 1; 65*60903Sbostic lineend = (u_char *) line->data + line->size-1; 66*60903Sbostic /* don't include rec_delimiter */ 67*60903Sbostic keypos = keybuf->data; 68*60903Sbostic 69*60903Sbostic for (i = 0; i < ncols; i++) { 70*60903Sbostic clpos = clist + i; 71*60903Sbostic for (; (col < clpos->num) && (pos < lineend); col++) 72*60903Sbostic { NEXTCOL(pos); } 73*60903Sbostic if (pos >= lineend) 74*60903Sbostic break; 75*60903Sbostic clpos->start = SEP_FLAG ? pos + 1 : pos; 76*60903Sbostic NEXTCOL(pos); 77*60903Sbostic clpos->end = pos; 78*60903Sbostic col++; 79*60903Sbostic if (pos >= lineend) { 80*60903Sbostic clpos->end = lineend; 81*60903Sbostic ++i; 82*60903Sbostic break; 83*60903Sbostic } 84*60903Sbostic } 85*60903Sbostic for (; i <= ncols; i++) 86*60903Sbostic clist[i].start = clist[i].end = lineend; 87*60903Sbostic if (clist[0].start < (u_char *) line->data) 88*60903Sbostic ++clist[0].start; 89*60903Sbostic endkey = (u_char *) keybuf + size - line->size; 90*60903Sbostic for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) 91*60903Sbostic if ((keypos = enterfield(keypos, endkey, ftpos, 92*60903Sbostic fieldtable->flags)) == NULL) 93*60903Sbostic return (1); 94*60903Sbostic 95*60903Sbostic if (UNIQUE) 96*60903Sbostic *(keypos-1) = REC_D; 97*60903Sbostic keybuf->offset = keypos - keybuf->data; 98*60903Sbostic keybuf->length = keybuf->offset + line->size; 99*60903Sbostic if (keybuf->length + sizeof(TRECHEADER) > size) 100*60903Sbostic return (1); /* line too long for buffer */ 101*60903Sbostic memcpy(keybuf->data + keybuf->offset, line->data, line->size); 102*60903Sbostic return (0); 103*60903Sbostic } 104*60903Sbostic 105*60903Sbostic /* 106*60903Sbostic * constructs a field (as defined by -k) within a key 107*60903Sbostic */ 108*60903Sbostic u_char * 109*60903Sbostic enterfield(tablepos, endkey, cur_fld, gflags) 110*60903Sbostic struct field *cur_fld; 111*60903Sbostic register u_char *tablepos, *endkey; 112*60903Sbostic int gflags; 113*60903Sbostic { 114*60903Sbostic register u_char *start, *end, *lineend, *mask, *lweight; 115*60903Sbostic struct column icol, tcol; 116*60903Sbostic register u_int flags; 117*60903Sbostic u_int Rflag; 118*60903Sbostic icol = cur_fld->icol; 119*60903Sbostic tcol = cur_fld->tcol; 120*60903Sbostic flags = cur_fld->flags; 121*60903Sbostic start = icol.p->start; 122*60903Sbostic lineend = clist[ncols].end; 123*60903Sbostic if (flags & BI) 124*60903Sbostic blancmange(start); 125*60903Sbostic start += icol.indent; 126*60903Sbostic start = min(start, lineend); 127*60903Sbostic if (!tcol.num) 128*60903Sbostic end = lineend; 129*60903Sbostic else { 130*60903Sbostic if (tcol.indent) { 131*60903Sbostic end = tcol.p->start; 132*60903Sbostic if (flags & BT) blancmange(end); 133*60903Sbostic end += tcol.indent; 134*60903Sbostic end = min(end, lineend); 135*60903Sbostic } else 136*60903Sbostic end = tcol.p->end; 137*60903Sbostic } 138*60903Sbostic if (flags & N) { 139*60903Sbostic Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0; 140*60903Sbostic tablepos = number(tablepos, endkey, start, end, Rflag); 141*60903Sbostic return (tablepos); 142*60903Sbostic } 143*60903Sbostic mask = alltable; 144*60903Sbostic mask = cur_fld->mask; 145*60903Sbostic lweight = cur_fld->weights; 146*60903Sbostic for (; start < end; start++) 147*60903Sbostic if (mask[*start]) { 148*60903Sbostic if (*start <= 1) { 149*60903Sbostic if (tablepos+2 >= endkey) 150*60903Sbostic return (NULL); 151*60903Sbostic *tablepos++ = lweight[1]; 152*60903Sbostic *tablepos++ = lweight[*start ? 2 : 1]; 153*60903Sbostic } else { 154*60903Sbostic *tablepos++ = lweight[*start]; 155*60903Sbostic if (tablepos == endkey) 156*60903Sbostic return (NULL); 157*60903Sbostic } 158*60903Sbostic } 159*60903Sbostic *tablepos++ = lweight[0]; 160*60903Sbostic return (tablepos == endkey ? NULL : tablepos); 161*60903Sbostic } 162*60903Sbostic 163*60903Sbostic /* Uses the first bin to assign sign, expsign, 0, and the first 164*60903Sbostic * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ). 165*60903Sbostic * When sorting in forward order: 166*60903Sbostic * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128; 167*60903Sbostic * else use (0-99)->(2-102). 168*60903Sbostic * If the exponent is >=61, use another byte for each additional 253 169*60903Sbostic * in the exponent. Cutoff is at 567. 170*60903Sbostic * To avoid confusing the exponent and the mantissa, use a field delimiter 171*60903Sbostic * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the 172*60903Sbostic * only time a field delimiter can come in that position. 173*60903Sbostic * Reverse order is done analagously. 174*60903Sbostic */ 175*60903Sbostic 176*60903Sbostic u_char * 177*60903Sbostic number(pos, bufend, line, lineend, Rflag) 178*60903Sbostic register u_char *line, *pos, *bufend, *lineend; 179*60903Sbostic int Rflag; 180*60903Sbostic { 181*60903Sbostic register int or_sign, parity = 0; 182*60903Sbostic register int expincr = 1, exponent = -1; 183*60903Sbostic int bite, expsign = 1, sign = 1; 184*60903Sbostic register u_char lastvalue, *nonzero, *tline, *C_TENS; 185*60903Sbostic u_char *nweights; 186*60903Sbostic 187*60903Sbostic if (Rflag) 188*60903Sbostic nweights = rnum; 189*60903Sbostic else 190*60903Sbostic nweights = fnum; 191*60903Sbostic if (pos > bufend - 8) 192*60903Sbostic return (NULL); 193*60903Sbostic /* or_sign sets the sort direction: 194*60903Sbostic * (-r: +/-)(sign: +/-)(expsign: +/-) */ 195*60903Sbostic or_sign = sign ^ expsign ^ Rflag; 196*60903Sbostic blancmange(line); 197*60903Sbostic if (*line == '-') { /* set the sign */ 198*60903Sbostic or_sign ^= 1; 199*60903Sbostic sign = 0; 200*60903Sbostic line++; 201*60903Sbostic } 202*60903Sbostic /* eat initial zeroes */ 203*60903Sbostic for (; *line == '0' && line < lineend; line++); 204*60903Sbostic /* calculate exponents < 0 */ 205*60903Sbostic if (*line == DECIMAL) { 206*60903Sbostic exponent = 1; 207*60903Sbostic while (*++line == '0' && line < lineend) 208*60903Sbostic exponent++; 209*60903Sbostic expincr = 0; 210*60903Sbostic expsign = 0; 211*60903Sbostic } 212*60903Sbostic /* next character better be a digit */ 213*60903Sbostic if (*line < '1' || *line > '9' || line >= lineend) { 214*60903Sbostic *pos++ = nweights[127]; 215*60903Sbostic return (pos); 216*60903Sbostic } 217*60903Sbostic if (expincr) { 218*60903Sbostic for (tline = line-1; *++tline >= '0' && 219*60903Sbostic *tline <= '9' && tline < lineend;) 220*60903Sbostic exponent++; 221*60903Sbostic } 222*60903Sbostic if (exponent > 567) { 223*60903Sbostic *pos++ = nweights[sign ? (expsign ? 254 : 128) 224*60903Sbostic : (expsign ? 0 : 126)]; 225*60903Sbostic warnx("exponent out of bounds"); 226*60903Sbostic return (pos); 227*60903Sbostic } 228*60903Sbostic bite = min(exponent, 61); 229*60903Sbostic *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite) 230*60903Sbostic : (expsign ? 64-bite : 64+bite)]; 231*60903Sbostic if (bite >= 61) { 232*60903Sbostic do { 233*60903Sbostic exponent -= bite; 234*60903Sbostic bite = min(exponent, 254); 235*60903Sbostic *pos++ = nweights[or_sign ? 254-bite : bite]; 236*60903Sbostic } while (bite == 254); 237*60903Sbostic } 238*60903Sbostic C_TENS = or_sign ? OFF_NTENS : OFF_TENS; 239*60903Sbostic for (; line < lineend; line++) { 240*60903Sbostic if (*line >= '0' && *line <= '9') { 241*60903Sbostic if (parity) { 242*60903Sbostic *pos++ = C_TENS[lastvalue] + (or_sign ? - *line 243*60903Sbostic : *line); 244*60903Sbostic if (pos == bufend) 245*60903Sbostic return (NULL); 246*60903Sbostic if (*line != '0' || lastvalue != '0') 247*60903Sbostic nonzero = pos; 248*60903Sbostic } else 249*60903Sbostic lastvalue = *line; 250*60903Sbostic parity ^= 1; 251*60903Sbostic } else if(*line == DECIMAL) { 252*60903Sbostic if(!expincr) /* a decimal already occurred once */ 253*60903Sbostic break; 254*60903Sbostic expincr = 0; 255*60903Sbostic } else 256*60903Sbostic break; 257*60903Sbostic } 258*60903Sbostic if (parity && lastvalue != '0') { 259*60903Sbostic *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' : 260*60903Sbostic OFF_TENS[lastvalue] + '0'; 261*60903Sbostic } else 262*60903Sbostic pos = nonzero; 263*60903Sbostic if (pos > bufend-1) 264*60903Sbostic return (NULL); 265*60903Sbostic *pos++ = or_sign ? nweights[254] : nweights[0]; 266*60903Sbostic return (pos); 267*60903Sbostic } 268*60903Sbostic 269*60903Sbostic /* This forces a gap around the record delimiter 270*60903Sbostic * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255)); 271*60903Sbostic * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0)) 272*60903Sbostic */ 273*60903Sbostic void 274*60903Sbostic num_init() 275*60903Sbostic { 276*60903Sbostic int i; 277*60903Sbostic TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0'; 278*60903Sbostic NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0'; 279*60903Sbostic OFF_TENS = TENS - '0'; 280*60903Sbostic OFF_NTENS = NEGTENS - '0'; 281*60903Sbostic for (i = 1; i < 10; i++) { 282*60903Sbostic TENS[i] = TENS[i-1] + 10; 283*60903Sbostic NEGTENS[i] = NEGTENS[i-1] - 10; 284*60903Sbostic } 285*60903Sbostic for (i = 0; i < REC_D; i++) { 286*60903Sbostic fnum[i] = i; 287*60903Sbostic rnum[255-i] = i; 288*60903Sbostic } 289*60903Sbostic for (i = REC_D; i <255; i++) { 290*60903Sbostic fnum[i] = i+1; 291*60903Sbostic rnum[255-i] = i-1; 292*60903Sbostic } 293*60903Sbostic } 294