1 /* $NetBSD: fields.c,v 1.19 2008/04/28 20:24:15 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 /* Subroutines to generate sort keys. */ 65 66 #include "sort.h" 67 68 #ifndef lint 69 __RCSID("$NetBSD: fields.c,v 1.19 2008/04/28 20:24:15 martin Exp $"); 70 __SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93"); 71 #endif /* not lint */ 72 73 #define SKIP_BLANKS(ptr) { \ 74 if (BLANK & d_mask[*(ptr)]) \ 75 while (BLANK & d_mask[*(++(ptr))]); \ 76 } 77 78 #define NEXTCOL(pos) { \ 79 if (!SEP_FLAG) \ 80 while (BLANK & l_d_mask[*(++pos)]); \ 81 while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\ 82 } 83 84 static u_char *enterfield __P((u_char *, u_char *, struct field *, int)); 85 static u_char *number __P((u_char *, u_char *, u_char *, u_char *, int)); 86 87 #define DECIMAL '.' 88 #define OFFSET 128 89 90 u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */ 91 u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */ 92 u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */ 93 u_char fnum[NBINS], rnum[NBINS]; 94 95 /* 96 * constructs sort key with leading recheader, followed by the key, 97 * followed by the original line. 98 */ 99 length_t 100 enterkey(keybuf, line, size, fieldtable) 101 RECHEADER *keybuf; /* pointer to start of key */ 102 DBT *line; 103 int size; 104 struct field fieldtable[]; 105 { 106 int i; 107 u_char *l_d_mask; 108 u_char *lineend, *pos; 109 u_char *endkey, *keypos; 110 struct coldesc *clpos; 111 int col = 1; 112 struct field *ftpos; 113 l_d_mask = d_mask; 114 pos = (u_char *) line->data - 1; 115 lineend = (u_char *) line->data + line->size-1; 116 /* don't include rec_delimiter */ 117 118 for (i = 0; i < ncols; i++) { 119 clpos = clist + i; 120 for (; (col < clpos->num) && (pos < lineend); col++) { 121 NEXTCOL(pos); 122 } 123 if (pos >= lineend) 124 break; 125 clpos->start = SEP_FLAG ? pos + 1 : pos; 126 NEXTCOL(pos); 127 clpos->end = pos; 128 col++; 129 if (pos >= lineend) { 130 clpos->end = lineend; 131 i++; 132 break; 133 } 134 } 135 for (; i <= ncols; i++) 136 clist[i].start = clist[i].end = lineend; 137 if (clist[0].start < (u_char *) line->data) 138 clist[0].start++; 139 140 keypos = keybuf->data; 141 endkey = (u_char *) keybuf + size - line->size; 142 for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) 143 if ((keypos = enterfield(keypos, endkey, ftpos, 144 fieldtable->flags)) == NULL) 145 return (1); 146 147 keybuf->offset = keypos - keybuf->data; 148 keybuf->length = keybuf->offset + line->size; 149 if (keybuf->length + sizeof(TRECHEADER) > size) { 150 /* line too long for buffer */ 151 return (1); 152 } 153 154 /* 155 * Make [s]radixsort() only sort by relevant part of key if: 156 * 1. we want to choose unique items by relevant field[s] 157 * 2. we want stable sort and so the items should be sorted only by 158 * the relevant field[s] 159 */ 160 if (UNIQUE || (stable_sort && keybuf->offset < line->size)) 161 keypos[-1] = REC_D; 162 163 memcpy(keybuf->data + keybuf->offset, line->data, line->size); 164 return (0); 165 } 166 167 /* 168 * constructs a field (as defined by -k) within a key 169 */ 170 static u_char * 171 enterfield(tablepos, endkey, cur_fld, gflags) 172 struct field *cur_fld; 173 u_char *tablepos, *endkey; 174 int gflags; 175 { 176 u_char *start, *end, *lineend, *mask, *lweight; 177 struct column icol, tcol; 178 u_int flags; 179 u_int Rflag; 180 181 icol = cur_fld->icol; 182 tcol = cur_fld->tcol; 183 flags = cur_fld->flags; 184 start = icol.p->start; 185 lineend = clist[ncols].end; 186 if (flags & BI) 187 SKIP_BLANKS(start); 188 start += icol.indent; 189 start = min(start, lineend); 190 191 if (!tcol.num) 192 end = lineend; 193 else { 194 if (tcol.indent) { 195 end = tcol.p->start; 196 if (flags & BT) 197 SKIP_BLANKS(end); 198 end += tcol.indent; 199 end = min(end, lineend); 200 } else 201 end = tcol.p->end; 202 } 203 204 if (flags & N) { 205 Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0; 206 return number(tablepos, endkey, start, end, Rflag); 207 } 208 209 mask = cur_fld->mask; 210 lweight = cur_fld->weights; 211 for (; start < end; start++) 212 if (mask[*start]) { 213 if (*start <= 1) { 214 if (tablepos+2 >= endkey) 215 return (NULL); 216 *tablepos++ = lweight[1]; 217 *tablepos++ = lweight[*start ? 2 : 1]; 218 } else { 219 if (tablepos+1 >= endkey) 220 return (NULL); 221 *tablepos++ = lweight[*start]; 222 } 223 } 224 *tablepos++ = lweight[0]; 225 return (tablepos == endkey ? NULL : tablepos); 226 } 227 228 /* Uses the first bin to assign sign, expsign, 0, and the first 229 * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ). 230 * When sorting in forward order: 231 * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128; 232 * else use (0-99)->(2-102). 233 * If the exponent is >=61, use another byte for each additional 253 234 * in the exponent. Cutoff is at 567. 235 * To avoid confusing the exponent and the mantissa, use a field delimiter 236 * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the 237 * only time a field delimiter can come in that position. 238 * Reverse order is done analagously. 239 */ 240 241 static u_char * 242 number(pos, bufend, line, lineend, Rflag) 243 u_char *line, *pos, *bufend, *lineend; 244 int Rflag; 245 { 246 int or_sign, parity = 0; 247 int expincr = 1, exponent = -1; 248 int bite, expsign = 1, sign = 1, zeroskip = 0; 249 u_char lastvalue='0', *nonzero=NULL, *tline, *C_TENS; 250 u_char *nweights; 251 252 if (Rflag) 253 nweights = rnum; 254 else 255 nweights = fnum; 256 if (pos > bufend - 8) 257 return (NULL); 258 /* 259 * or_sign sets the sort direction: 260 * (-r: +/-)(sign: +/-)(expsign: +/-) 261 */ 262 or_sign = sign ^ expsign ^ Rflag; 263 SKIP_BLANKS(line); 264 if (*line == '-') { /* set the sign */ 265 or_sign ^= 1; 266 sign = 0; 267 line++; 268 } 269 /* eat initial zeroes */ 270 for (; *line == '0' && line < lineend; line++) 271 zeroskip = 1; 272 /* calculate exponents < 0 */ 273 if (*line == DECIMAL) { 274 exponent = 1; 275 while (*++line == '0' && line < lineend) 276 exponent++; 277 expincr = 0; 278 expsign = 0; 279 } 280 /* next character better be a digit */ 281 if (*line < '1' || *line > '9' || line >= lineend) { 282 /* only exit if we didn't skip any zero number */ 283 if (!zeroskip) { 284 *pos++ = nweights[127]; 285 return (pos); 286 } 287 } 288 if (expincr) { 289 for (tline = line-1; *++tline >= '0' && 290 *tline <= '9' && tline < lineend;) 291 exponent++; 292 } 293 if (exponent > 567) { 294 *pos++ = nweights[sign ? (expsign ? 254 : 128) 295 : (expsign ? 0 : 126)]; 296 warnx("exponent out of bounds"); 297 return (pos); 298 } 299 bite = min(exponent, 61); 300 *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite) 301 : (expsign ? 64-bite : 64+bite)]; 302 if (bite >= 61) { 303 do { 304 exponent -= bite; 305 bite = min(exponent, 254); 306 *pos++ = nweights[or_sign ? 254-bite : bite]; 307 } while (bite == 254); 308 } 309 C_TENS = or_sign ? OFF_NTENS : OFF_TENS; 310 for (; line < lineend; line++) { 311 if (*line >= '0' && *line <= '9') { 312 if (parity) { 313 *pos++ = C_TENS[lastvalue] + (or_sign ? - *line 314 : *line); 315 if (pos == bufend) 316 return (NULL); 317 if (*line != '0' || lastvalue != '0') 318 nonzero = pos; 319 } else 320 lastvalue = *line; 321 parity ^= 1; 322 } else if (*line == DECIMAL) { 323 if (!expincr) /* a decimal already occurred once */ 324 break; 325 expincr = 0; 326 } else 327 break; 328 } 329 if ((parity && lastvalue != '0') || !nonzero) { 330 *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' : 331 OFF_TENS[lastvalue] + '0'; 332 } else 333 pos = nonzero; 334 if (pos > bufend-1) 335 return (NULL); 336 *pos++ = or_sign ? nweights[254] : nweights[0]; 337 return (pos); 338 } 339 340 /* This forces a gap around the record delimiter 341 * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255)); 342 * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0)) 343 */ 344 void 345 num_init() 346 { 347 int i; 348 TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0'; 349 NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0'; 350 OFF_TENS = TENS - '0'; 351 OFF_NTENS = NEGTENS - '0'; 352 for (i = 1; i < 10; i++) { 353 TENS[i] = TENS[i - 1] + 10; 354 NEGTENS[i] = NEGTENS[i - 1] - 10; 355 } 356 for (i = 0; i < REC_D; i++) { 357 fnum[i] = i; 358 rnum[255 - i] = i; 359 } 360 for (i = REC_D; i <255; i++) { 361 fnum[i] = i + 1; 362 rnum[255 - i] = i - 1; 363 } 364 } 365