1 /* $OpenBSD: parse.c,v 1.16 2007/03/20 03:50:39 tedu Exp $ */ 2 /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #ifndef lint 34 /*static char sccsid[] = "from: @(#)parse.c 5.6 (Berkeley) 3/9/91";*/ 35 static char rcsid[] = "$OpenBSD: parse.c,v 1.16 2007/03/20 03:50:39 tedu Exp $"; 36 #endif /* not lint */ 37 38 #include <sys/types.h> 39 #include <sys/file.h> 40 41 #include <ctype.h> 42 #include <err.h> 43 #include <errno.h> 44 #include <fcntl.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 49 #include "hexdump.h" 50 51 FU *endfu; /* format at end-of-data */ 52 53 void 54 addfile(char *name) 55 { 56 FILE *fp; 57 size_t len; 58 char *buf, *lbuf, *p; 59 60 if ((fp = fopen(name, "r")) == NULL) 61 err(1, "fopen %s", name); 62 63 lbuf = NULL; 64 while ((buf = fgetln(fp, &len))) { 65 if (buf[len - 1] == '\n') 66 buf[len - 1] = '\0'; 67 else { 68 /* EOF without EOL, copy and add the NUL */ 69 if ((lbuf = malloc(len + 1)) == NULL) 70 err(1, NULL); 71 memcpy(lbuf, buf, len); 72 lbuf[len] = '\0'; 73 buf = lbuf; 74 } 75 for (p = buf; isspace((unsigned char)*p); ++p); 76 if (!*p || *p == '#') 77 continue; 78 add(p); 79 } 80 free(lbuf); 81 (void)fclose(fp); 82 } 83 84 void 85 add(const char *fmt) 86 { 87 const char *p; 88 static FS **nextfs; 89 FS *tfs; 90 FU *tfu, **nextfu; 91 const char *savep; 92 93 /* start new linked list of format units */ 94 tfs = emalloc(sizeof(FS)); 95 if (!fshead) 96 fshead = tfs; 97 else 98 *nextfs = tfs; 99 nextfs = &tfs->nextfs; 100 nextfu = &tfs->nextfu; 101 102 /* take the format string and break it up into format units */ 103 for (p = fmt;;) { 104 /* skip leading white space */ 105 for (; isspace((unsigned char)*p); ++p); 106 if (!*p) 107 break; 108 109 /* allocate a new format unit and link it in */ 110 tfu = emalloc(sizeof(FU)); 111 *nextfu = tfu; 112 nextfu = &tfu->nextfu; 113 tfu->reps = 1; 114 115 /* if leading digit, repetition count */ 116 if (isdigit((unsigned char)*p)) { 117 for (savep = p; isdigit((unsigned char)*p); ++p); 118 if (!isspace((unsigned char)*p) && *p != '/') 119 badfmt(fmt); 120 /* may overwrite either white space or slash */ 121 tfu->reps = atoi(savep); 122 tfu->flags = F_SETREP; 123 /* skip trailing white space */ 124 for (++p; isspace((unsigned char)*p); ++p); 125 } 126 127 /* skip slash and trailing white space */ 128 if (*p == '/') 129 while (isspace((unsigned char)*++p)); 130 131 /* byte count */ 132 if (isdigit((unsigned char)*p)) { 133 for (savep = p; isdigit((unsigned char)*p); ++p); 134 if (!isspace((unsigned char)*p)) 135 badfmt(fmt); 136 tfu->bcnt = atoi(savep); 137 /* skip trailing white space */ 138 for (++p; isspace((unsigned char)*p); ++p); 139 } 140 141 /* format */ 142 if (*p != '"') 143 badfmt(fmt); 144 for (savep = ++p; *p != '"';) 145 if (*p++ == 0) 146 badfmt(fmt); 147 if (!(tfu->fmt = malloc(p - savep + 1))) 148 nomem(); 149 (void) strncpy(tfu->fmt, savep, p - savep); 150 tfu->fmt[p - savep] = '\0'; 151 escape(tfu->fmt); 152 p++; 153 } 154 } 155 156 static const char *spec = ".#-+ 0123456789"; 157 158 int 159 size(FS *fs) 160 { 161 FU *fu; 162 int bcnt, cursize; 163 char *fmt; 164 int prec; 165 166 /* figure out the data block size needed for each format unit */ 167 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) { 168 if (fu->bcnt) { 169 cursize += fu->bcnt * fu->reps; 170 continue; 171 } 172 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) { 173 if (*fmt != '%') 174 continue; 175 /* 176 * skip any special chars -- save precision in 177 * case it's a %s format. 178 */ 179 while (*++fmt && strchr(spec + 1, *fmt)); 180 if (*fmt == '.' && isdigit((unsigned char)*++fmt)) { 181 prec = atoi(fmt); 182 while (isdigit((unsigned char)*++fmt)); 183 } 184 switch(*fmt) { 185 case 'c': 186 bcnt += 1; 187 break; 188 case 'd': case 'i': case 'o': case 'u': 189 case 'x': case 'X': 190 bcnt += 4; 191 break; 192 case 'e': case 'E': case 'f': case 'g': case 'G': 193 bcnt += 8; 194 break; 195 case 's': 196 bcnt += prec; 197 break; 198 case '_': 199 switch(*++fmt) { 200 case 'c': case 'p': case 'u': 201 bcnt += 1; 202 break; 203 } 204 } 205 } 206 cursize += bcnt * fu->reps; 207 } 208 return (cursize); 209 } 210 211 void 212 rewrite(FS *fs) 213 { 214 enum { NOTOKAY, USEBCNT, USEPREC } sokay; 215 PR *pr, **nextpr; 216 FU *fu; 217 char *p1, *p2; 218 char savech, *fmtp, cs[3]; 219 int nconv, prec; 220 size_t len; 221 222 nextpr = NULL; 223 prec = 0; 224 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 225 /* 226 * Break each format unit into print units; each conversion 227 * character gets its own. 228 */ 229 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) { 230 pr = emalloc(sizeof(PR)); 231 if (!fu->nextpr) 232 fu->nextpr = pr; 233 else 234 *nextpr = pr; 235 236 /* Skip preceding text and up to the next % sign. */ 237 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1); 238 239 /* Only text in the string. */ 240 if (!*p1) { 241 pr->fmt = fmtp; 242 pr->flags = F_TEXT; 243 break; 244 } 245 246 /* 247 * Get precision for %s -- if have a byte count, don't 248 * need it. 249 */ 250 if (fu->bcnt) { 251 sokay = USEBCNT; 252 /* Skip to conversion character. */ 253 for (++p1; *p1 && strchr(spec, *p1); ++p1); 254 } else { 255 /* Skip any special chars, field width. */ 256 while (*++p1 && strchr(spec + 1, *p1)); 257 if (*p1 == '.' && 258 isdigit((unsigned char)*++p1)) { 259 sokay = USEPREC; 260 prec = atoi(p1); 261 while (isdigit((unsigned char)*++p1)) 262 continue; 263 } else 264 sokay = NOTOKAY; 265 } 266 267 p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */ 268 cs[0] = *p1; /* Set conversion string. */ 269 cs[1] = '\0'; 270 271 /* 272 * Figure out the byte count for each conversion; 273 * rewrite the format as necessary, set up blank- 274 * padding for end of data. 275 */ 276 switch(cs[0]) { 277 case 'c': 278 pr->flags = F_CHAR; 279 switch(fu->bcnt) { 280 case 0: case 1: 281 pr->bcnt = 1; 282 break; 283 default: 284 p1[1] = '\0'; 285 badcnt(p1); 286 } 287 break; 288 case 'd': case 'i': 289 case 'o': case 'u': case 'x': case 'X': 290 if (cs[0] == 'd' || cs[0] == 'i') 291 pr->flags = F_INT; 292 else 293 pr->flags = F_UINT; 294 295 cs[2] = '\0'; 296 cs[1] = cs[0]; 297 cs[0] = 'q'; 298 switch(fu->bcnt) { 299 case 0: case 4: 300 pr->bcnt = 4; 301 break; 302 case 1: 303 pr->bcnt = 1; 304 break; 305 case 2: 306 pr->bcnt = 2; 307 break; 308 case 8: 309 pr->bcnt = 8; 310 break; 311 default: 312 p1[1] = '\0'; 313 badcnt(p1); 314 } 315 break; 316 case 'e': case 'E': case 'f': case 'g': case 'G': 317 pr->flags = F_DBL; 318 switch(fu->bcnt) { 319 case 0: case 8: 320 pr->bcnt = 8; 321 break; 322 case 4: 323 pr->bcnt = 4; 324 break; 325 default: 326 p1[1] = '\0'; 327 badcnt(p1); 328 } 329 break; 330 case 's': 331 pr->flags = F_STR; 332 switch(sokay) { 333 case NOTOKAY: 334 badsfmt(); 335 case USEBCNT: 336 pr->bcnt = fu->bcnt; 337 break; 338 case USEPREC: 339 pr->bcnt = prec; 340 break; 341 } 342 break; 343 case '_': 344 ++p2; 345 switch(p1[1]) { 346 case 'A': 347 endfu = fu; 348 fu->flags |= F_IGNORE; 349 /* FALLTHROUGH */ 350 case 'a': 351 pr->flags = F_ADDRESS; 352 ++p2; 353 switch(p1[2]) { 354 case 'd': case 'o': case'x': 355 cs[0] = 'q'; 356 cs[1] = p1[2]; 357 cs[2] = '\0'; 358 break; 359 default: 360 if (p1[2]) 361 p1[3] = '\0'; 362 badconv(p1); 363 } 364 break; 365 case 'c': 366 case 'p': 367 case 'u': 368 if (p1[1] == 'c') { 369 pr->flags = F_C; 370 /* cs[0] = 'c'; set in conv_c */ 371 } else if (p1[1] == 'p') { 372 pr->flags = F_P; 373 cs[0] = 'c'; 374 } else { 375 pr->flags = F_U; 376 /* cs[0] = 'c'; set in conv_u */ 377 } 378 379 switch(fu->bcnt) { 380 case 0: case 1: 381 pr->bcnt = 1; 382 break; 383 default: 384 p1[2] = '\0'; 385 badcnt(p1); 386 } 387 break; 388 default: 389 if (p1[1]) 390 p1[2] = '\0'; 391 badconv(p1); 392 } 393 break; 394 default: 395 if (cs[0]) 396 p1[1] = '\0'; 397 badconv(p1); 398 } 399 400 /* 401 * Copy to PR format string, set conversion character 402 * pointer, update original. 403 */ 404 savech = *p2; 405 p1[0] = '\0'; 406 len = strlen(fmtp) + strlen(cs) + 1; 407 pr->fmt = emalloc(len); 408 snprintf(pr->fmt, len, "%s%s", fmtp, cs); 409 *p2 = savech; 410 pr->cchar = pr->fmt + (p1 - fmtp); 411 fmtp = p2; 412 413 /* Only one conversion character if byte count. */ 414 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) 415 errx(1, 416 "byte count with multiple conversion characters"); 417 } 418 /* 419 * If format unit byte count not specified, figure it out 420 * so can adjust rep count later. 421 */ 422 if (!fu->bcnt) 423 for (pr = fu->nextpr; pr; pr = pr->nextpr) 424 fu->bcnt += pr->bcnt; 425 } 426 /* 427 * If the format string interprets any data at all, and it's 428 * not the same as the blocksize, and its last format unit 429 * interprets any data at all, and has no iteration count, 430 * repeat it as necessary. 431 * 432 * If, rep count is greater than 1, no trailing whitespace 433 * gets output from the last iteration of the format unit. 434 */ 435 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 436 if (!fu->nextfu && fs->bcnt < blocksize && 437 !(fu->flags&F_SETREP) && fu->bcnt) 438 fu->reps += (blocksize - fs->bcnt) / fu->bcnt; 439 if (fu->reps > 1) { 440 if (!fu->nextpr) 441 break; 442 for (pr = fu->nextpr;; pr = pr->nextpr) 443 if (!pr->nextpr) 444 break; 445 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1) 446 p2 = isspace((unsigned char)*p1) ? p1 : NULL; 447 if (p2) 448 pr->nospace = p2; 449 } 450 } 451 #ifdef DEBUG 452 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 453 (void)printf("fmt:"); 454 for (pr = fu->nextpr; pr; pr = pr->nextpr) 455 (void)printf(" {%s}", pr->fmt); 456 (void)printf("\n"); 457 } 458 #endif 459 } 460 461 void 462 escape(char *p1) 463 { 464 char *p2; 465 466 /* alphabetic escape sequences have to be done in place */ 467 for (p2 = p1;; ++p1, ++p2) { 468 if (!*p1) { 469 *p2 = *p1; 470 break; 471 } 472 if (*p1 == '\\') { 473 switch(*++p1) { 474 case '\0': 475 *p2++ = '\\'; 476 *p2 = '\0'; 477 return; /* incomplete escape sequence */ 478 case 'a': 479 /* *p2 = '\a'; */ 480 *p2 = '\007'; 481 break; 482 case 'b': 483 *p2 = '\b'; 484 break; 485 case 'f': 486 *p2 = '\f'; 487 break; 488 case 'n': 489 *p2 = '\n'; 490 break; 491 case 'r': 492 *p2 = '\r'; 493 break; 494 case 't': 495 *p2 = '\t'; 496 break; 497 case 'v': 498 *p2 = '\v'; 499 break; 500 default: 501 *p2 = *p1; 502 break; 503 } 504 } else 505 *p2 = *p1; 506 } 507 } 508 509 void 510 badcnt(char *s) 511 { 512 errx(1, "%s: bad byte count", s); 513 } 514 515 void 516 badsfmt(void) 517 { 518 errx(1, "%%s: requires a precision or a byte count"); 519 } 520 521 void 522 badfmt(const char *fmt) 523 { 524 errx(1, "\"%s\": bad format", fmt); 525 } 526 527 void 528 badconv(char *ch) 529 { 530 errx(1, "%%%s: bad conversion character", ch); 531 } 532