1 /* $NetBSD: fields.c,v 1.18 2004/03/14 21:12:14 heas Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /*- 40 * Copyright (c) 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * Peter McIlroy. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 */ 70 71 /* Subroutines to generate sort keys. */ 72 73 #include "sort.h" 74 75 #ifndef lint 76 __RCSID("$NetBSD: fields.c,v 1.18 2004/03/14 21:12:14 heas Exp $"); 77 __SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93"); 78 #endif /* not lint */ 79 80 #define SKIP_BLANKS(ptr) { \ 81 if (BLANK & d_mask[*(ptr)]) \ 82 while (BLANK & d_mask[*(++(ptr))]); \ 83 } 84 85 #define NEXTCOL(pos) { \ 86 if (!SEP_FLAG) \ 87 while (BLANK & l_d_mask[*(++pos)]); \ 88 while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\ 89 } 90 91 static u_char *enterfield __P((u_char *, u_char *, struct field *, int)); 92 static u_char *number __P((u_char *, u_char *, u_char *, u_char *, int)); 93 94 #define DECIMAL '.' 95 #define OFFSET 128 96 97 u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */ 98 u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */ 99 u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */ 100 u_char fnum[NBINS], rnum[NBINS]; 101 102 /* 103 * constructs sort key with leading recheader, followed by the key, 104 * followed by the original line. 105 */ 106 length_t 107 enterkey(keybuf, line, size, fieldtable) 108 RECHEADER *keybuf; /* pointer to start of key */ 109 DBT *line; 110 int size; 111 struct field fieldtable[]; 112 { 113 int i; 114 u_char *l_d_mask; 115 u_char *lineend, *pos; 116 u_char *endkey, *keypos; 117 struct coldesc *clpos; 118 int col = 1; 119 struct field *ftpos; 120 l_d_mask = d_mask; 121 pos = (u_char *) line->data - 1; 122 lineend = (u_char *) line->data + line->size-1; 123 /* don't include rec_delimiter */ 124 125 for (i = 0; i < ncols; i++) { 126 clpos = clist + i; 127 for (; (col < clpos->num) && (pos < lineend); col++) { 128 NEXTCOL(pos); 129 } 130 if (pos >= lineend) 131 break; 132 clpos->start = SEP_FLAG ? pos + 1 : pos; 133 NEXTCOL(pos); 134 clpos->end = pos; 135 col++; 136 if (pos >= lineend) { 137 clpos->end = lineend; 138 i++; 139 break; 140 } 141 } 142 for (; i <= ncols; i++) 143 clist[i].start = clist[i].end = lineend; 144 if (clist[0].start < (u_char *) line->data) 145 clist[0].start++; 146 147 keypos = keybuf->data; 148 endkey = (u_char *) keybuf + size - line->size; 149 for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) 150 if ((keypos = enterfield(keypos, endkey, ftpos, 151 fieldtable->flags)) == NULL) 152 return (1); 153 154 keybuf->offset = keypos - keybuf->data; 155 keybuf->length = keybuf->offset + line->size; 156 if (keybuf->length + sizeof(TRECHEADER) > size) { 157 /* line too long for buffer */ 158 return (1); 159 } 160 161 /* 162 * Make [s]radixsort() only sort by relevant part of key if: 163 * 1. we want to choose unique items by relevant field[s] 164 * 2. we want stable sort and so the items should be sorted only by 165 * the relevant field[s] 166 */ 167 if (UNIQUE || (stable_sort && keybuf->offset < line->size)) 168 keypos[-1] = REC_D; 169 170 memcpy(keybuf->data + keybuf->offset, line->data, line->size); 171 return (0); 172 } 173 174 /* 175 * constructs a field (as defined by -k) within a key 176 */ 177 static u_char * 178 enterfield(tablepos, endkey, cur_fld, gflags) 179 struct field *cur_fld; 180 u_char *tablepos, *endkey; 181 int gflags; 182 { 183 u_char *start, *end, *lineend, *mask, *lweight; 184 struct column icol, tcol; 185 u_int flags; 186 u_int Rflag; 187 188 icol = cur_fld->icol; 189 tcol = cur_fld->tcol; 190 flags = cur_fld->flags; 191 start = icol.p->start; 192 lineend = clist[ncols].end; 193 if (flags & BI) 194 SKIP_BLANKS(start); 195 start += icol.indent; 196 start = min(start, lineend); 197 198 if (!tcol.num) 199 end = lineend; 200 else { 201 if (tcol.indent) { 202 end = tcol.p->start; 203 if (flags & BT) 204 SKIP_BLANKS(end); 205 end += tcol.indent; 206 end = min(end, lineend); 207 } else 208 end = tcol.p->end; 209 } 210 211 if (flags & N) { 212 Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0; 213 return number(tablepos, endkey, start, end, Rflag); 214 } 215 216 mask = cur_fld->mask; 217 lweight = cur_fld->weights; 218 for (; start < end; start++) 219 if (mask[*start]) { 220 if (*start <= 1) { 221 if (tablepos+2 >= endkey) 222 return (NULL); 223 *tablepos++ = lweight[1]; 224 *tablepos++ = lweight[*start ? 2 : 1]; 225 } else { 226 if (tablepos+1 >= endkey) 227 return (NULL); 228 *tablepos++ = lweight[*start]; 229 } 230 } 231 *tablepos++ = lweight[0]; 232 return (tablepos == endkey ? NULL : tablepos); 233 } 234 235 /* Uses the first bin to assign sign, expsign, 0, and the first 236 * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ). 237 * When sorting in forward order: 238 * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128; 239 * else use (0-99)->(2-102). 240 * If the exponent is >=61, use another byte for each additional 253 241 * in the exponent. Cutoff is at 567. 242 * To avoid confusing the exponent and the mantissa, use a field delimiter 243 * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the 244 * only time a field delimiter can come in that position. 245 * Reverse order is done analagously. 246 */ 247 248 static u_char * 249 number(pos, bufend, line, lineend, Rflag) 250 u_char *line, *pos, *bufend, *lineend; 251 int Rflag; 252 { 253 int or_sign, parity = 0; 254 int expincr = 1, exponent = -1; 255 int bite, expsign = 1, sign = 1, zeroskip = 0; 256 u_char lastvalue='0', *nonzero=NULL, *tline, *C_TENS; 257 u_char *nweights; 258 259 if (Rflag) 260 nweights = rnum; 261 else 262 nweights = fnum; 263 if (pos > bufend - 8) 264 return (NULL); 265 /* 266 * or_sign sets the sort direction: 267 * (-r: +/-)(sign: +/-)(expsign: +/-) 268 */ 269 or_sign = sign ^ expsign ^ Rflag; 270 SKIP_BLANKS(line); 271 if (*line == '-') { /* set the sign */ 272 or_sign ^= 1; 273 sign = 0; 274 line++; 275 } 276 /* eat initial zeroes */ 277 for (; *line == '0' && line < lineend; line++) 278 zeroskip = 1; 279 /* calculate exponents < 0 */ 280 if (*line == DECIMAL) { 281 exponent = 1; 282 while (*++line == '0' && line < lineend) 283 exponent++; 284 expincr = 0; 285 expsign = 0; 286 } 287 /* next character better be a digit */ 288 if (*line < '1' || *line > '9' || line >= lineend) { 289 /* only exit if we didn't skip any zero number */ 290 if (!zeroskip) { 291 *pos++ = nweights[127]; 292 return (pos); 293 } 294 } 295 if (expincr) { 296 for (tline = line-1; *++tline >= '0' && 297 *tline <= '9' && tline < lineend;) 298 exponent++; 299 } 300 if (exponent > 567) { 301 *pos++ = nweights[sign ? (expsign ? 254 : 128) 302 : (expsign ? 0 : 126)]; 303 warnx("exponent out of bounds"); 304 return (pos); 305 } 306 bite = min(exponent, 61); 307 *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite) 308 : (expsign ? 64-bite : 64+bite)]; 309 if (bite >= 61) { 310 do { 311 exponent -= bite; 312 bite = min(exponent, 254); 313 *pos++ = nweights[or_sign ? 254-bite : bite]; 314 } while (bite == 254); 315 } 316 C_TENS = or_sign ? OFF_NTENS : OFF_TENS; 317 for (; line < lineend; line++) { 318 if (*line >= '0' && *line <= '9') { 319 if (parity) { 320 *pos++ = C_TENS[lastvalue] + (or_sign ? - *line 321 : *line); 322 if (pos == bufend) 323 return (NULL); 324 if (*line != '0' || lastvalue != '0') 325 nonzero = pos; 326 } else 327 lastvalue = *line; 328 parity ^= 1; 329 } else if (*line == DECIMAL) { 330 if (!expincr) /* a decimal already occurred once */ 331 break; 332 expincr = 0; 333 } else 334 break; 335 } 336 if ((parity && lastvalue != '0') || !nonzero) { 337 *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' : 338 OFF_TENS[lastvalue] + '0'; 339 } else 340 pos = nonzero; 341 if (pos > bufend-1) 342 return (NULL); 343 *pos++ = or_sign ? nweights[254] : nweights[0]; 344 return (pos); 345 } 346 347 /* This forces a gap around the record delimiter 348 * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255)); 349 * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0)) 350 */ 351 void 352 num_init() 353 { 354 int i; 355 TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0'; 356 NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0'; 357 OFF_TENS = TENS - '0'; 358 OFF_NTENS = NEGTENS - '0'; 359 for (i = 1; i < 10; i++) { 360 TENS[i] = TENS[i - 1] + 10; 361 NEGTENS[i] = NEGTENS[i - 1] - 10; 362 } 363 for (i = 0; i < REC_D; i++) { 364 fnum[i] = i; 365 rnum[255 - i] = i; 366 } 367 for (i = REC_D; i <255; i++) { 368 fnum[i] = i + 1; 369 rnum[255 - i] = i - 1; 370 } 371 } 372