1 /* $OpenBSD: parse.c,v 1.9 2001/12/30 08:17:32 pvalchev Exp $ */ 2 /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #ifndef lint 38 /*static char sccsid[] = "from: @(#)parse.c 5.6 (Berkeley) 3/9/91";*/ 39 static char rcsid[] = "$OpenBSD: parse.c,v 1.9 2001/12/30 08:17:32 pvalchev Exp $"; 40 #endif /* not lint */ 41 42 #include <sys/types.h> 43 #include <sys/file.h> 44 45 #include <ctype.h> 46 #include <err.h> 47 #include <errno.h> 48 #include <fcntl.h> 49 #include <stdio.h> 50 #include <stdlib.h> 51 #include <string.h> 52 53 #include "hexdump.h" 54 55 FU *endfu; /* format at end-of-data */ 56 57 void 58 addfile(name) 59 char *name; 60 { 61 char *p; 62 FILE *fp; 63 int ch; 64 char buf[2048 + 1]; 65 66 if ((fp = fopen(name, "r")) == NULL) 67 err(1, "fopen %s", name); 68 while (fgets(buf, sizeof(buf), fp)) { 69 if (!(p = strchr(buf, '\n'))) { 70 warnx("line too long."); 71 while ((ch = getchar()) != '\n' && ch != EOF); 72 continue; 73 } 74 *p = '\0'; 75 for (p = buf; *p && isspace((unsigned char)*p); ++p); 76 if (!*p || *p == '#') 77 continue; 78 add(p); 79 } 80 (void)fclose(fp); 81 } 82 83 void 84 add(fmt) 85 const char *fmt; 86 { 87 const char *p; 88 static FS **nextfs; 89 FS *tfs; 90 FU *tfu, **nextfu; 91 const char *savep; 92 93 /* start new linked list of format units */ 94 tfs = emalloc(sizeof(FS)); 95 if (!fshead) 96 fshead = tfs; 97 else 98 *nextfs = tfs; 99 nextfs = &tfs->nextfs; 100 nextfu = &tfs->nextfu; 101 102 /* take the format string and break it up into format units */ 103 for (p = fmt;;) { 104 /* skip leading white space */ 105 for (; isspace((unsigned char)*p); ++p); 106 if (!*p) 107 break; 108 109 /* allocate a new format unit and link it in */ 110 tfu = emalloc(sizeof(FU)); 111 *nextfu = tfu; 112 nextfu = &tfu->nextfu; 113 tfu->reps = 1; 114 115 /* if leading digit, repetition count */ 116 if (isdigit((unsigned char)*p)) { 117 for (savep = p; isdigit((unsigned char)*p); ++p); 118 if (!isspace((unsigned char)*p) && *p != '/') 119 badfmt(fmt); 120 /* may overwrite either white space or slash */ 121 tfu->reps = atoi(savep); 122 tfu->flags = F_SETREP; 123 /* skip trailing white space */ 124 for (++p; isspace((unsigned char)*p); ++p); 125 } 126 127 /* skip slash and trailing white space */ 128 if (*p == '/') 129 while (isspace((unsigned char)*++p)); 130 131 /* byte count */ 132 if (isdigit((unsigned char)*p)) { 133 for (savep = p; isdigit((unsigned char)*p); ++p); 134 if (!isspace((unsigned char)*p)) 135 badfmt(fmt); 136 tfu->bcnt = atoi(savep); 137 /* skip trailing white space */ 138 for (++p; isspace((unsigned char)*p); ++p); 139 } 140 141 /* format */ 142 if (*p != '"') 143 badfmt(fmt); 144 for (savep = ++p; *p != '"';) 145 if (*p++ == 0) 146 badfmt(fmt); 147 if (!(tfu->fmt = malloc(p - savep + 1))) 148 nomem(); 149 (void) strncpy(tfu->fmt, savep, p - savep); 150 tfu->fmt[p - savep] = '\0'; 151 escape(tfu->fmt); 152 p++; 153 } 154 } 155 156 static const char *spec = ".#-+ 0123456789"; 157 158 int 159 size(fs) 160 FS *fs; 161 { 162 FU *fu; 163 int bcnt, cursize; 164 char *fmt; 165 int prec; 166 167 /* figure out the data block size needed for each format unit */ 168 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) { 169 if (fu->bcnt) { 170 cursize += fu->bcnt * fu->reps; 171 continue; 172 } 173 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) { 174 if (*fmt != '%') 175 continue; 176 /* 177 * skip any special chars -- save precision in 178 * case it's a %s format. 179 */ 180 while (strchr(spec + 1, *++fmt)); 181 if (*fmt == '.' && isdigit((unsigned char)*++fmt)) { 182 prec = atoi(fmt); 183 while (isdigit((unsigned char)*++fmt)); 184 } 185 switch(*fmt) { 186 case 'c': 187 bcnt += 1; 188 break; 189 case 'd': case 'i': case 'o': case 'u': 190 case 'x': case 'X': 191 bcnt += 4; 192 break; 193 case 'e': case 'E': case 'f': case 'g': case 'G': 194 bcnt += 8; 195 break; 196 case 's': 197 bcnt += prec; 198 break; 199 case '_': 200 switch(*++fmt) { 201 case 'c': case 'p': case 'u': 202 bcnt += 1; 203 break; 204 } 205 } 206 } 207 cursize += bcnt * fu->reps; 208 } 209 return (cursize); 210 } 211 212 void 213 rewrite(fs) 214 FS *fs; 215 { 216 enum { NOTOKAY, USEBCNT, USEPREC } sokay; 217 PR *pr, **nextpr; 218 FU *fu; 219 char *p1, *p2; 220 char savech, *fmtp, cs[3]; 221 int nconv, prec; 222 223 nextpr = NULL; 224 prec = 0; 225 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 226 /* 227 * Break each format unit into print units; each conversion 228 * character gets its own. 229 */ 230 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) { 231 pr = emalloc(sizeof(PR)); 232 if (!fu->nextpr) 233 fu->nextpr = pr; 234 else 235 *nextpr = pr; 236 237 /* Skip preceding text and up to the next % sign. */ 238 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1); 239 240 /* Only text in the string. */ 241 if (!*p1) { 242 pr->fmt = fmtp; 243 pr->flags = F_TEXT; 244 break; 245 } 246 247 /* 248 * Get precision for %s -- if have a byte count, don't 249 * need it. 250 */ 251 if (fu->bcnt) { 252 sokay = USEBCNT; 253 /* Skip to conversion character. */ 254 for (++p1; strchr(spec, *p1); ++p1); 255 } else { 256 /* Skip any special chars, field width. */ 257 while (strchr(spec + 1, *++p1)); 258 if (*p1 == '.' && 259 isdigit((unsigned char)*++p1)) { 260 sokay = USEPREC; 261 prec = atoi(p1); 262 while (isdigit((unsigned char)*++p1)) 263 continue; 264 } else 265 sokay = NOTOKAY; 266 } 267 268 p2 = p1 + 1; /* Set end pointer. */ 269 cs[0] = *p1; /* Set conversion string. */ 270 cs[1] = '\0'; 271 272 /* 273 * Figure out the byte count for each conversion; 274 * rewrite the format as necessary, set up blank- 275 * padding for end of data. 276 */ 277 switch(cs[0]) { 278 case 'c': 279 pr->flags = F_CHAR; 280 switch(fu->bcnt) { 281 case 0: case 1: 282 pr->bcnt = 1; 283 break; 284 default: 285 p1[1] = '\0'; 286 badcnt(p1); 287 } 288 break; 289 case 'd': case 'i': 290 pr->flags = F_INT; 291 goto isint; 292 case 'o': case 'u': case 'x': case 'X': 293 pr->flags = F_UINT; 294 isint: cs[2] = '\0'; 295 cs[1] = cs[0]; 296 cs[0] = 'q'; 297 switch(fu->bcnt) { 298 case 0: case 4: 299 pr->bcnt = 4; 300 break; 301 case 1: 302 pr->bcnt = 1; 303 break; 304 case 2: 305 pr->bcnt = 2; 306 break; 307 case 8: 308 pr->bcnt = 8; 309 break; 310 default: 311 p1[1] = '\0'; 312 badcnt(p1); 313 } 314 break; 315 case 'e': case 'E': case 'f': case 'g': case 'G': 316 pr->flags = F_DBL; 317 switch(fu->bcnt) { 318 case 0: case 8: 319 pr->bcnt = 8; 320 break; 321 case 4: 322 pr->bcnt = 4; 323 break; 324 default: 325 p1[1] = '\0'; 326 badcnt(p1); 327 } 328 break; 329 case 's': 330 pr->flags = F_STR; 331 switch(sokay) { 332 case NOTOKAY: 333 badsfmt(); 334 case USEBCNT: 335 pr->bcnt = fu->bcnt; 336 break; 337 case USEPREC: 338 pr->bcnt = prec; 339 break; 340 } 341 break; 342 case '_': 343 ++p2; 344 switch(p1[1]) { 345 case 'A': 346 endfu = fu; 347 fu->flags |= F_IGNORE; 348 /* FALLTHROUGH */ 349 case 'a': 350 pr->flags = F_ADDRESS; 351 ++p2; 352 switch(p1[2]) { 353 case 'd': case 'o': case'x': 354 cs[0] = 'q'; 355 cs[1] = p1[2]; 356 cs[2] = '\0'; 357 break; 358 default: 359 p1[3] = '\0'; 360 badconv(p1); 361 } 362 break; 363 case 'c': 364 pr->flags = F_C; 365 /* cs[0] = 'c'; set in conv_c */ 366 goto isint2; 367 case 'p': 368 pr->flags = F_P; 369 cs[0] = 'c'; 370 goto isint2; 371 case 'u': 372 pr->flags = F_U; 373 /* cs[0] = 'c'; set in conv_u */ 374 isint2: switch(fu->bcnt) { 375 case 0: case 1: 376 pr->bcnt = 1; 377 break; 378 default: 379 p1[2] = '\0'; 380 badcnt(p1); 381 } 382 break; 383 default: 384 p1[2] = '\0'; 385 badconv(p1); 386 } 387 break; 388 default: 389 p1[1] = '\0'; 390 badconv(p1); 391 } 392 393 /* 394 * Copy to PR format string, set conversion character 395 * pointer, update original. 396 */ 397 savech = *p2; 398 p1[0] = '\0'; 399 pr->fmt = emalloc(strlen(fmtp) + strlen(cs) + 1); 400 (void)strcpy(pr->fmt, fmtp); 401 (void)strcat(pr->fmt, cs); 402 *p2 = savech; 403 pr->cchar = pr->fmt + (p1 - fmtp); 404 fmtp = p2; 405 406 /* Only one conversion character if byte count. */ 407 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) 408 errx(1, 409 "byte count with multiple conversion characters"); 410 } 411 /* 412 * If format unit byte count not specified, figure it out 413 * so can adjust rep count later. 414 */ 415 if (!fu->bcnt) 416 for (pr = fu->nextpr; pr; pr = pr->nextpr) 417 fu->bcnt += pr->bcnt; 418 } 419 /* 420 * If the format string interprets any data at all, and it's 421 * not the same as the blocksize, and its last format unit 422 * interprets any data at all, and has no iteration count, 423 * repeat it as necessary. 424 * 425 * If, rep count is greater than 1, no trailing whitespace 426 * gets output from the last iteration of the format unit. 427 */ 428 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 429 if (!fu->nextfu && fs->bcnt < blocksize && 430 !(fu->flags&F_SETREP) && fu->bcnt) 431 fu->reps += (blocksize - fs->bcnt) / fu->bcnt; 432 if (fu->reps > 1) { 433 for (pr = fu->nextpr;; pr = pr->nextpr) 434 if (!pr->nextpr) 435 break; 436 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1) 437 p2 = isspace((unsigned char)*p1) ? p1 : NULL; 438 if (p2) 439 pr->nospace = p2; 440 } 441 } 442 #ifdef DEBUG 443 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 444 (void)printf("fmt:"); 445 for (pr = fu->nextpr; pr; pr = pr->nextpr) 446 (void)printf(" {%s}", pr->fmt); 447 (void)printf("\n"); 448 } 449 #endif 450 } 451 452 void 453 escape(p1) 454 char *p1; 455 { 456 char *p2; 457 458 /* alphabetic escape sequences have to be done in place */ 459 for (p2 = p1;; ++p1, ++p2) { 460 if (!*p1) { 461 *p2 = *p1; 462 break; 463 } 464 if (*p1 == '\\') 465 switch(*++p1) { 466 case 'a': 467 /* *p2 = '\a'; */ 468 *p2 = '\007'; 469 break; 470 case 'b': 471 *p2 = '\b'; 472 break; 473 case 'f': 474 *p2 = '\f'; 475 break; 476 case 'n': 477 *p2 = '\n'; 478 break; 479 case 'r': 480 *p2 = '\r'; 481 break; 482 case 't': 483 *p2 = '\t'; 484 break; 485 case 'v': 486 *p2 = '\v'; 487 break; 488 default: 489 *p2 = *p1; 490 break; 491 } 492 } 493 } 494 495 void 496 badcnt(s) 497 char *s; 498 { 499 errx(1, "%s: bad byte count", s); 500 } 501 502 void 503 badsfmt() 504 { 505 errx(1, "%%s: requires a precision or a byte count\n"); 506 } 507 508 void 509 badfmt(fmt) 510 const char *fmt; 511 { 512 errx(1, "\"%s\": bad format\n", fmt); 513 } 514 515 void 516 badconv(ch) 517 char *ch; 518 { 519 errx(1, "%%%s: bad conversion character\n", ch); 520 } 521