1 /* $NetBSD: parse.c,v 1.11 2001/02/07 18:32:07 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #ifndef lint 38 #if 0 39 static char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93"; 40 #else 41 __RCSID("$NetBSD: parse.c,v 1.11 2001/02/07 18:32:07 christos Exp $"); 42 #endif 43 #endif /* not lint */ 44 45 #include <sys/types.h> 46 #include <sys/file.h> 47 48 #include <ctype.h> 49 #include <err.h> 50 #include <errno.h> 51 #include <fcntl.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <string.h> 55 56 #include "hexdump.h" 57 58 FU *endfu; /* format at end-of-data */ 59 60 void 61 addfile(name) 62 char *name; 63 { 64 char *p; 65 FILE *fp; 66 int ch; 67 char buf[2048 + 1]; 68 69 if ((fp = fopen(name, "r")) == NULL) 70 err(1, "fopen %s", name); 71 while (fgets(buf, sizeof(buf), fp)) { 72 if (!(p = strchr(buf, '\n'))) { 73 warnx("line too long."); 74 while ((ch = getchar()) != '\n' && ch != EOF); 75 continue; 76 } 77 *p = '\0'; 78 for (p = buf; *p && isspace((unsigned char)*p); ++p); 79 if (!*p || *p == '#') 80 continue; 81 add(p); 82 } 83 (void)fclose(fp); 84 } 85 86 void 87 add(fmt) 88 const char *fmt; 89 { 90 const char *p; 91 static FS **nextfs; 92 FS *tfs; 93 FU *tfu, **nextfu; 94 const char *savep; 95 96 /* start new linked list of format units */ 97 tfs = emalloc(sizeof(FS)); 98 if (!fshead) 99 fshead = tfs; 100 else 101 *nextfs = tfs; 102 nextfs = &tfs->nextfs; 103 nextfu = &tfs->nextfu; 104 105 /* take the format string and break it up into format units */ 106 for (p = fmt;;) { 107 /* skip leading white space */ 108 for (; isspace((unsigned char)*p); ++p); 109 if (!*p) 110 break; 111 112 /* allocate a new format unit and link it in */ 113 tfu = emalloc(sizeof(FU)); 114 *nextfu = tfu; 115 nextfu = &tfu->nextfu; 116 tfu->reps = 1; 117 118 /* if leading digit, repetition count */ 119 if (isdigit((unsigned char)*p)) { 120 for (savep = p; isdigit((unsigned char)*p); ++p); 121 if (!isspace((unsigned char)*p) && *p != '/') 122 badfmt(fmt); 123 /* may overwrite either white space or slash */ 124 tfu->reps = atoi(savep); 125 tfu->flags = F_SETREP; 126 /* skip trailing white space */ 127 for (++p; isspace((unsigned char)*p); ++p); 128 } 129 130 /* skip slash and trailing white space */ 131 if (*p == '/') 132 while (isspace((unsigned char)*++p)); 133 134 /* byte count */ 135 if (isdigit((unsigned char)*p)) { 136 for (savep = p; isdigit((unsigned char)*p); ++p); 137 if (!isspace((unsigned char)*p)) 138 badfmt(fmt); 139 tfu->bcnt = atoi(savep); 140 /* skip trailing white space */ 141 for (++p; isspace((unsigned char)*p); ++p); 142 } 143 144 /* format */ 145 if (*p != '"') 146 badfmt(fmt); 147 for (savep = ++p; *p != '"';) 148 if (*p++ == 0) 149 badfmt(fmt); 150 if (!(tfu->fmt = malloc(p - savep + 1))) 151 nomem(); 152 (void) strncpy(tfu->fmt, savep, p - savep); 153 tfu->fmt[p - savep] = '\0'; 154 escape(tfu->fmt); 155 p++; 156 } 157 } 158 159 static const char *spec = ".#-+ 0123456789"; 160 161 int 162 size(fs) 163 FS *fs; 164 { 165 FU *fu; 166 int bcnt, cursize; 167 char *fmt; 168 int prec; 169 170 /* figure out the data block size needed for each format unit */ 171 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) { 172 if (fu->bcnt) { 173 cursize += fu->bcnt * fu->reps; 174 continue; 175 } 176 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) { 177 if (*fmt != '%') 178 continue; 179 /* 180 * skip any special chars -- save precision in 181 * case it's a %s format. 182 */ 183 while (strchr(spec + 1, *++fmt)); 184 if (*fmt == '.' && isdigit((unsigned char)*++fmt)) { 185 prec = atoi(fmt); 186 while (isdigit((unsigned char)*++fmt)); 187 } 188 switch(*fmt) { 189 case 'c': 190 bcnt += 1; 191 break; 192 case 'd': case 'i': case 'o': case 'u': 193 case 'x': case 'X': 194 bcnt += 4; 195 break; 196 case 'e': case 'E': case 'f': case 'g': case 'G': 197 bcnt += 8; 198 break; 199 case 's': 200 bcnt += prec; 201 break; 202 case '_': 203 switch(*++fmt) { 204 case 'c': case 'p': case 'u': 205 bcnt += 1; 206 break; 207 } 208 } 209 } 210 cursize += bcnt * fu->reps; 211 } 212 return (cursize); 213 } 214 215 void 216 rewrite(fs) 217 FS *fs; 218 { 219 enum { NOTOKAY, USEBCNT, USEPREC } sokay; 220 PR *pr, **nextpr; 221 FU *fu; 222 char *p1, *p2; 223 char savech, *fmtp, cs[3]; 224 int nconv, prec; 225 226 nextpr = NULL; 227 prec = 0; 228 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 229 /* 230 * Break each format unit into print units; each conversion 231 * character gets its own. 232 */ 233 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) { 234 pr = emalloc(sizeof(PR)); 235 if (!fu->nextpr) 236 fu->nextpr = pr; 237 else 238 *nextpr = pr; 239 240 /* Skip preceding text and up to the next % sign. */ 241 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1); 242 243 /* Only text in the string. */ 244 if (!*p1) { 245 pr->fmt = fmtp; 246 pr->flags = F_TEXT; 247 break; 248 } 249 250 /* 251 * Get precision for %s -- if have a byte count, don't 252 * need it. 253 */ 254 if (fu->bcnt) { 255 sokay = USEBCNT; 256 /* Skip to conversion character. */ 257 for (++p1; strchr(spec, *p1); ++p1); 258 } else { 259 /* Skip any special chars, field width. */ 260 while (strchr(spec + 1, *++p1)); 261 if (*p1 == '.' && 262 isdigit((unsigned char)*++p1)) { 263 sokay = USEPREC; 264 prec = atoi(p1); 265 while (isdigit((unsigned char)*++p1)) 266 continue; 267 } else 268 sokay = NOTOKAY; 269 } 270 271 p2 = p1 + 1; /* Set end pointer. */ 272 cs[0] = *p1; /* Set conversion string. */ 273 cs[1] = '\0'; 274 275 /* 276 * Figure out the byte count for each conversion; 277 * rewrite the format as necessary, set up blank- 278 * padding for end of data. 279 */ 280 switch(cs[0]) { 281 case 'c': 282 pr->flags = F_CHAR; 283 switch(fu->bcnt) { 284 case 0: case 1: 285 pr->bcnt = 1; 286 break; 287 default: 288 p1[1] = '\0'; 289 badcnt(p1); 290 } 291 break; 292 case 'd': case 'i': 293 pr->flags = F_INT; 294 goto isint; 295 case 'o': case 'u': case 'x': case 'X': 296 pr->flags = F_UINT; 297 isint: cs[2] = '\0'; 298 cs[1] = cs[0]; 299 cs[0] = 'q'; 300 switch(fu->bcnt) { 301 case 0: case 4: 302 pr->bcnt = 4; 303 break; 304 case 1: 305 pr->bcnt = 1; 306 break; 307 case 2: 308 pr->bcnt = 2; 309 break; 310 default: 311 p1[1] = '\0'; 312 badcnt(p1); 313 } 314 break; 315 case 'e': case 'E': case 'f': case 'g': case 'G': 316 pr->flags = F_DBL; 317 switch(fu->bcnt) { 318 case 0: case 8: 319 pr->bcnt = 8; 320 break; 321 case 4: 322 pr->bcnt = 4; 323 break; 324 default: 325 p1[1] = '\0'; 326 badcnt(p1); 327 } 328 break; 329 case 's': 330 pr->flags = F_STR; 331 switch(sokay) { 332 case NOTOKAY: 333 badsfmt(); 334 case USEBCNT: 335 pr->bcnt = fu->bcnt; 336 break; 337 case USEPREC: 338 pr->bcnt = prec; 339 break; 340 } 341 break; 342 case '_': 343 ++p2; 344 switch(p1[1]) { 345 case 'A': 346 endfu = fu; 347 fu->flags |= F_IGNORE; 348 /* FALLTHROUGH */ 349 case 'a': 350 pr->flags = F_ADDRESS; 351 ++p2; 352 switch(p1[2]) { 353 case 'd': case 'o': case'x': 354 cs[0] = 'q'; 355 cs[1] = p1[2]; 356 cs[2] = '\0'; 357 break; 358 default: 359 p1[3] = '\0'; 360 badconv(p1); 361 } 362 break; 363 case 'c': 364 pr->flags = F_C; 365 /* cs[0] = 'c'; set in conv_c */ 366 goto isint2; 367 case 'p': 368 pr->flags = F_P; 369 cs[0] = 'c'; 370 goto isint2; 371 case 'u': 372 pr->flags = F_U; 373 /* cs[0] = 'c'; set in conv_u */ 374 isint2: switch(fu->bcnt) { 375 case 0: case 1: 376 pr->bcnt = 1; 377 break; 378 default: 379 p1[2] = '\0'; 380 badcnt(p1); 381 } 382 break; 383 default: 384 p1[2] = '\0'; 385 badconv(p1); 386 } 387 break; 388 default: 389 p1[1] = '\0'; 390 badconv(p1); 391 } 392 393 /* 394 * Copy to PR format string, set conversion character 395 * pointer, update original. 396 */ 397 savech = *p2; 398 p1[0] = '\0'; 399 pr->fmt = emalloc(strlen(fmtp) + strlen(cs) + 1); 400 (void)strcpy(pr->fmt, fmtp); 401 (void)strcat(pr->fmt, cs); 402 *p2 = savech; 403 pr->cchar = pr->fmt + (p1 - fmtp); 404 fmtp = p2; 405 406 /* Only one conversion character if byte count. */ 407 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) 408 errx(1, 409 "byte count with multiple conversion characters"); 410 } 411 /* 412 * If format unit byte count not specified, figure it out 413 * so can adjust rep count later. 414 */ 415 if (!fu->bcnt) 416 for (pr = fu->nextpr; pr; pr = pr->nextpr) 417 fu->bcnt += pr->bcnt; 418 } 419 /* 420 * If the format string interprets any data at all, and it's 421 * not the same as the blocksize, and its last format unit 422 * interprets any data at all, and has no iteration count, 423 * repeat it as necessary. 424 * 425 * If, rep count is greater than 1, no trailing whitespace 426 * gets output from the last iteration of the format unit. 427 */ 428 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 429 if (!fu->nextfu && fs->bcnt < blocksize && 430 !(fu->flags&F_SETREP) && fu->bcnt) 431 fu->reps += (blocksize - fs->bcnt) / fu->bcnt; 432 if (fu->reps > 1) { 433 for (pr = fu->nextpr;; pr = pr->nextpr) 434 if (!pr->nextpr) 435 break; 436 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1) 437 p2 = isspace((unsigned char)*p1) ? p1 : NULL; 438 if (p2) 439 pr->nospace = p2; 440 } 441 } 442 #ifdef DEBUG 443 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 444 (void)printf("fmt:"); 445 for (pr = fu->nextpr; pr; pr = pr->nextpr) 446 (void)printf(" {%s}", pr->fmt); 447 (void)printf("\n"); 448 } 449 #endif 450 } 451 452 void 453 escape(p1) 454 char *p1; 455 { 456 char *p2; 457 458 /* alphabetic escape sequences have to be done in place */ 459 for (p2 = p1;; ++p1, ++p2) { 460 if (!*p1) { 461 *p2 = *p1; 462 break; 463 } 464 if (*p1 == '\\') 465 switch(*++p1) { 466 case 'a': 467 /* *p2 = '\a'; */ 468 *p2 = '\007'; 469 break; 470 case 'b': 471 *p2 = '\b'; 472 break; 473 case 'f': 474 *p2 = '\f'; 475 break; 476 case 'n': 477 *p2 = '\n'; 478 break; 479 case 'r': 480 *p2 = '\r'; 481 break; 482 case 't': 483 *p2 = '\t'; 484 break; 485 case 'v': 486 *p2 = '\v'; 487 break; 488 default: 489 *p2 = *p1; 490 break; 491 } 492 } 493 } 494 495 void 496 badcnt(s) 497 char *s; 498 { 499 errx(1, "%s: bad byte count", s); 500 } 501 502 void 503 badsfmt() 504 { 505 errx(1, "%%s: requires a precision or a byte count\n"); 506 } 507 508 void 509 badfmt(fmt) 510 const char *fmt; 511 { 512 errx(1, "\"%s\": bad format\n", fmt); 513 } 514 515 void 516 badconv(ch) 517 char *ch; 518 { 519 errx(1, "%%%s: bad conversion character\n", ch); 520 } 521